There are two possible directories containing sequence data: * /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL ** this directory is for running the code on UVA’s HPC Rivanna
(you can easily switch between these two directories by selecting the old path, up to & including Bioinformatics, and hit Cmd F to bring up Find & Replace tool, then copy-paste the new path into the Replace box and hit All. There should be XX replacements)
/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL *Ns & primers present (raw files)
/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/filtN *Ns removed, primers present (pre-filtered)
/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt *Ns & primers removed (cutadapted)
/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered *Ns & primers removed and filter & trimmed (filtered)
#install packages with BiocManager (if you have anaconda)
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("dada2", version = "3.16")
# BiocManager::install(c("DECIPHER", "ShortRead", "phyloseq"))
# BiocManager::install("decontam")
library(devtools); packageVersion("devtools")
## Loading required package: usethis
## [1] '2.4.5'
library(dada2); packageVersion("dada2")
## Loading required package: Rcpp
## [1] '1.32.0'
library(ShortRead); packageVersion("ShortRead")
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
## Loading required package: BiocParallel
## Loading required package: Biostrings
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: XVector
## Loading required package: GenomeInfoDb
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
## Loading required package: Rsamtools
## Loading required package: GenomicRanges
## Loading required package: GenomicAlignments
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## [1] '1.62.0'
library(Biostrings); packageVersion("Biostrings")
## [1] '2.72.1'
library(DECIPHER); packageVersion("DECIPHER")
## [1] '3.0.0'
library(phyloseq); packageVersion("phyloseq")
##
## Attaching package: 'phyloseq'
## The following object is masked from 'package:SummarizedExperiment':
##
## distance
## The following object is masked from 'package:Biobase':
##
## sampleNames
## The following object is masked from 'package:GenomicRanges':
##
## distance
## The following object is masked from 'package:IRanges':
##
## distance
## [1] '1.48.0'
library(ggplot2); packageVersion("ggplot2")
## [1] '3.5.1'
#library(decontam); packageVersion("decontam")
#devtools::install_github("benjjneb/dada2", ref="v1.16") # change the ref argument to get other versions
setwd("/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL")
path <- "/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL" ## CHANGE ME to the directory containing the fastq files.
head(list.files(path, pattern = "*.fastq"))
## [1] "rbcL-2020-6-16-H1_S293_L001_R1_001.fastq"
## [2] "rbcL-2020-6-16-H1_S293_L001_R2_001.fastq"
## [3] "rbcL-2020-6-16-H5_S294_L001_R1_001.fastq"
## [4] "rbcL-2020-6-16-H5_S294_L001_R2_001.fastq"
## [5] "rbcL-2020-6-16-H6_S295_L001_R1_001.fastq"
## [6] "rbcL-2020-6-16-H6_S295_L001_R2_001.fastq"
list.files(path, pattern = "*.fastq")[1]
## [1] "rbcL-2020-6-16-H1_S293_L001_R1_001.fastq"
#R.utils::gunzip(list.files(path), remove=F)
R.utils::isGzipped(list.files(path, pattern = "*.fastq")[1]) # checking that the file is unzipped, FALSE = not gzipped
## [1] FALSE
# intstall R.utils
# library(R.utils)
# lapply(list.files(path, pattern = "*.gz"), FUN=gunzip, remove=F) # unzip all .gz files and don't remove compressed files
# I manually moved all compressed files into a new folder, leaving these unzipped files in the working directory for this script
# commenting out since I only need to unzip once
Match forward and reverse reads by sample name. Pre-filter to remove reads with Ns.
Forward and reverse fastq files have the format: rbcL_SAMPLENAME_SXXX_L001_R1_001.fastq and rbcL_SAMPLENAME_SXXX_L001_R2_001.fastq, respectively
For example: rbcL-2020-6-16-H1_S293_L001_R1_001.fastq is the forward reads of rbcL sample 2020-06-16-H1
fnFs <- sort(list.files(path, pattern = "L001_R1_001.fastq", full.names = TRUE))
fnRs <- sort(list.files(path, pattern = "L001_R2_001.fastq", full.names = TRUE))
#string parsing may have to be altered in your own data if your file names have a different format.
Ambiguous bases (Ns) in the sequencing reads makes accurate mapping of short primer sequences difficult. Here, remove reads with Ns, but perform no other filtering.
fnFs.filtN <- file.path(path, "filtN", basename(fnFs)) #create directory paths to contain N-filterd files in filtN/ subdirectory within path
fnRs.filtN <- file.path(path, "filtN", basename(fnRs))
Now we can filter out whole sequences and trim parts of sequences based on their quality score. This function takes files from path /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL and creates new files in filtN folder /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/filtN
filterAndTrim(fnFs, fnFs.filtN, fnRs, fnRs.filtN, maxN = 0, multithread = TRUE, matchIDs = T, compress = F) #eliminates sequences with more than 0 Ns;
## Some input samples had no reads pass the filter.
#I had an issue with "Mismatched forward and reverse sequence files" but adding the matchID=T parameter fixed it; #I had an issue with the filtN files being compressed so the cutadapt command couldn't read the files ("UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte"), adding the compress = F parameter fixed it (("Or you could just gzip all your files at the beginning." - benjjneb)??)
#before filterAndTrim
plotQualityProfile(fnFs[1:1]) #checking quality and number of FWD reads of first sample
plotQualityProfile(fnRs[1:1]) #checking quality and number of REV reads of first sample
#after filterAndTrim
plotQualityProfile(fnFs.filtN[1:1]) #checking quality and number of FWD reads of first sample
plotQualityProfile(fnRs.filtN[1:1]) #checking quality and number of REV reads of first sample
Not every sample made it through the pre-filter to remove reads with Ns
length(file.path(path, "filtN", basename(fnFs))) #length of "fnFs.filtN," created in chunk above (261)
## [1] 261
length(list.files(file.path(path, "filtN"), pattern = "L001_R1_001.fastq", full.names = TRUE)) #length of files actually written to the fnFs.filtN directories (258)
## [1] 258
# update directory, since not all samples made it thru the filter
fnFs.filtN <- file.path(path, "filtN", basename(list.files(file.path(path, "filtN"), pattern = "L001_R1_001.fastq", full.names = TRUE)))
fnRs.filtN <- file.path(path, "filtN", basename(list.files(file.path(path, "filtN"), pattern = "L001_R2_001.fastq", full.names = TRUE)))
#rbcL primers
FWD <- "TGGCAGCATTYCGAGTAACTC" ## CHANGE ME to your forward primer sequence
REV <- "GTAAAATCAAGTCCACCRCG" ## CHANGE ME...
#to ensure we have the right primers, and the correct orientation of the primers on the reads, we will verify the presence and orientation of these primers in the data
allOrients <- function(primer) {
# Create all orientations of the input sequence
require(Biostrings)
dna <- DNAString(primer) # The Biostrings works w/ DNAString objects rather than character vectors
orients <- c(Forward = dna, Complement = complement(dna), Reverse = reverse(dna),
RevComp = reverseComplement(dna))
return(sapply(orients, toString)) # Convert back to character vector
}
FWD.orients <- allOrients(FWD)
REV.orients <- allOrients(REV)
FWD.orients #all possible orientations of forward
## Forward Complement Reverse
## "TGGCAGCATTYCGAGTAACTC" "ACCGTCGTAARGCTCATTGAG" "CTCAATGAGCYTTACGACGGT"
## RevComp
## "GAGTTACTCGRAATGCTGCCA"
REV.orients #...and reverse primers
## Forward Complement Reverse
## "GTAAAATCAAGTCCACCRCG" "CATTTTAGTTCAGGTGGYGC" "GCRCCACCTGAACTAAAATG"
## RevComp
## "CGYGGTGGACTTGATTTTAC"
We are now ready to count the number of times the primers appear in the forward and reverse read, while considering all possible primer orientations. Identifying and counting the primers on one set of paired end FASTQ files is sufficient, assuming all the files were created using the same library preparation, so we’ll just process the first sample.
primerHits <- function(primer, fn) {
# Counts number of reads in which the primer is found
nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE)
return(sum(nhits > 0))
}
rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.filtN[[1]]),
FWD.ReverseReads = sapply(FWD.orients, primerHits, fn = fnRs.filtN[[1]]),
REV.ForwardReads = sapply(REV.orients, primerHits, fn = fnFs.filtN[[1]]),
REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.filtN[[1]]))
## Forward Complement Reverse RevComp
## FWD.ForwardReads 15698 0 0 0
## FWD.ReverseReads 0 0 0 0
## REV.ForwardReads 0 0 0 0
## REV.ReverseReads 13318 0 0 0
Note: Orientation mixups are a common trip-up. If, for example, the REV primer is matching the Reverse reads in its RevComp orientation, then replace REV with its reverse-complement orientation (REV <- REV.orient[[“RevComp”]]) before proceeding.
These primers can be now removed using a specialized primer/adapter removal tool. Here, we use cutadapt for this purpose. Download, installation and usage instructions are available online: http://cutadapt.readthedocs.io/en/stable/index.html
#cutadapt <- "/Users/kelseyschoenemann/opt/anaconda3/envs/cutadaptenv/bin/cutadapt" #CHANGE ME to the cutadapt path on your local machine
cutadapt <- "/home/kls7sg/.local/bin/cutadapt" #for running on Rivanna HPC
system2(cutadapt, args = "--version") # Run shell commands from R
If the above command successfully executed, R has found cutadapt and you are ready to continue following along.
We now create output filenames for the cutadapt-ed files, and define the parameters we are going to give the cutadapt command. The critical parameters are the primers, and they need to be in the right orientation, i.e. the FWD primer should have been matching the forward-reads in its forward orientation, and the REV primer should have been matching the reverse-reads in its forward orientation.
path.cut <- file.path(path, "cutadapt"); if(!dir.exists(path.cut)) dir.create(path.cut) #create a new folder in the main directory called cutadapt
#/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt
# fnFs.cut <- file.path(path.cut, basename(fnFs)) #old code #to place fwd reads w/o primers in the new cutadapt directory
# fnRs.cut <- file.path(path.cut, basename(fnRs)) #old code
length(file.path(path, basename(fnFs))) # 261 samples with F reads in original directory
## [1] 261
length(list.files(file.path(path, "filtN"), pattern = "L001_R1_001.fastq", full.names = TRUE)) # but only 258 samples passed the filtN filter (removing reads with Ns)
## [1] 258
# figuring out how to create/call directory paths with just retained samples
# basename(list.files(file.path(path, "filtN"), pattern = "L001_R1_001.fastq", full.names = TRUE))[1]
# file.path(path.cut, basename(list.files(file.path(path, "filtN"), pattern = "L001_R1_001.fastq", full.names = TRUE))[1])
# here's an updated directory that only includes destinations for samples/files that still exist
fnFs.cut <- file.path(path.cut, sort(basename(list.files(file.path(path, "filtN"), pattern = "R1_001.fastq", full.names = TRUE)))) #to place forward reads with primers cut (removed) in the new cutadapt directory
fnRs.cut <- file.path(path.cut, sort(basename(list.files(file.path(path, "filtN"), pattern = "R2_001.fastq", full.names = TRUE)))) #to place reverse reads with primers cut (removed) in the new cutadapt directory
FWD.RC <- dada2:::rc(FWD) #generate reverse complement of fwd
REV.RC <- dada2:::rc(REV) #...and rev primers
R1.flags <- paste("-g", FWD, "-a", REV.RC) # To flag FWD and reverse-complement of REV for removal from forward reads (R1)
R2.flags <- paste("-G", REV, "-A", FWD.RC) # To flag REV and reverse-complement of FWD for removal from reverse reads (R2)
# Run Cutadapt to cut flagged sequences from input reads and save cut sequences to output folder
#Warning: A lot of output will be written to the console by cutadapt!
for(i in seq_along(fnFs)) {
system2(cutadapt, args = c(
R1.flags, R2.flags, "-n", 2, #-n 2 required to remove FWD & REV from reads
"-o", fnFs.cut[i], "-p", fnRs.cut[i], # output files
fnFs.filtN[i], fnRs.filtN[i]) # input files
)
}
As a sanity check, we will count the presence of primers in the first cutadapt-ed sample:
rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.cut[[1]]),
FWD.ReverseReads = sapply(FWD.orients, primerHits, fn = fnRs.cut[[1]]),
REV.ForwardReads = sapply(REV.orients, primerHits, fn = fnFs.cut[[1]]),
REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.cut[[1]]))
## Forward Complement Reverse RevComp
## FWD.ForwardReads 0 0 0 0
## FWD.ReverseReads 0 0 0 0
## REV.ForwardReads 0 0 0 0
## REV.ReverseReads 0 0 0 0
Success! Primers are no longer detected in the cutadapted reads
The primer-free sequence files are now ready to be analyzed through the DADA2 pipeline.
#Prep the pre-filtered & “cutadapted” sequence reads
#the only thing changing from last time is 'path' becomes 'path.cut'
#fnRs <- sort(list.files(path, pattern = "_2.fastq.gz", full.names = TRUE))
cutFs <- sort(list.files(path.cut, pattern = "L001_R1_001.fastq", full.names = TRUE))
cutRs <- sort(list.files(path.cut, pattern = "L001_R2_001.fastq", full.names = TRUE))
To store the output files of filtered reads as fastq.gz files, we’re creating another directory /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered
filtFs <- file.path(path.cut, "filtered", basename(cutFs))
filtRs <- file.path(path.cut, "filtered", basename(cutRs))
#recall, the PRE-filter filter: filterAndTrim(fnFs, fnFs.filtN, fnRs, fnRs.filtN, maxN = 0, multithread = TRUE, matchIDs = T, compress=F) #eliminates sequences with more than 0 Ns
#NOW we filter for more stringent Quality Control
out <- filterAndTrim(cutFs, filtFs, cutRs, filtRs, maxN = 0, maxEE = c(2, 2), minLen = 50, rm.phix = TRUE, compress = TRUE, multithread = TRUE)
## Some input samples had no reads pass the filter.
head(out)
## reads.in reads.out
## rbcL-2020-6-16-H1_S293_L001_R1_001.fastq 16617 9241
## rbcL-2020-6-16-H5_S294_L001_R1_001.fastq 9459 5929
## rbcL-2020-6-16-H6_S295_L001_R1_001.fastq 1296 850
## rbcL-2020-6-17-H2_S296_L001_R1_001.fastq 6917 4559
## rbcL-2020-6-17-H4_S297_L001_R1_001.fastq 3027 2027
## rbcL-2020-6-17-H8_S298_L001_R1_001.fastq 24212 15437
out.table<-as.data.frame(cbind(out,(out[,2]/out[,1])*100))
100-mean((out[,2]/out[,1])*100, na.rm=T) #loose 60% of reads on average
## [1] 40.16607
For this dataset, we will use standard filtering parameters:
n=20
#before filterAndTrim
plotQualityProfile(cutFs[n:n]) #checking quality and number of FWD reads of nth sample
#after filterAndTrim
plotQualityProfile(filtFs[n:n]) #checking quality and number of FWD reads of nth sample
# updating sample names for "out"
length(rownames(as.data.frame(out)))
## [1] 258
rownames(as.data.frame(out))
## [1] "rbcL-2020-6-16-H1_S293_L001_R1_001.fastq"
## [2] "rbcL-2020-6-16-H5_S294_L001_R1_001.fastq"
## [3] "rbcL-2020-6-16-H6_S295_L001_R1_001.fastq"
## [4] "rbcL-2020-6-17-H2_S296_L001_R1_001.fastq"
## [5] "rbcL-2020-6-17-H4_S297_L001_R1_001.fastq"
## [6] "rbcL-2020-6-17-H8_S298_L001_R1_001.fastq"
## [7] "rbcL-2020-6-18-H3_S299_L001_R1_001.fastq"
## [8] "rbcL-2020-6-18-H7_S300_L001_R1_001.fastq"
## [9] "rbcL-2020-6-18-H9_S301_L001_R1_001.fastq"
## [10] "rbcL-2020-6-3-H1_S302_L001_R1_001.fastq"
## [11] "rbcL-2020-6-3-H5_S303_L001_R1_001.fastq"
## [12] "rbcL-2020-6-3-H6_S304_L001_R1_001.fastq"
## [13] "rbcL-2020-6-30-H1_S305_L001_R1_001.fastq"
## [14] "rbcL-2020-6-30-H5_S306_L001_R1_001.fastq"
## [15] "rbcL-2020-6-30-H6_S307_L001_R1_001.fastq"
## [16] "rbcL-2020-6-4-H2_S308_L001_R1_001.fastq"
## [17] "rbcL-2020-6-4-H4_S309_L001_R1_001.fastq"
## [18] "rbcL-2020-6-4-H8_S310_L001_R1_001.fastq"
## [19] "rbcL-2020-6-5-H3_S311_L001_R1_001.fastq"
## [20] "rbcL-2020-6-5-H7_S312_L001_R1_001.fastq"
## [21] "rbcL-2020-6-5-H9_S313_L001_R1_001.fastq"
## [22] "rbcL-2020-7-1-H2_S314_L001_R1_001.fastq"
## [23] "rbcL-2020-7-1-H4_S315_L001_R1_001.fastq"
## [24] "rbcL-2020-7-1-H8_S316_L001_R1_001.fastq"
## [25] "rbcL-2020-7-14-H1_S317_L001_R1_001.fastq"
## [26] "rbcL-2020-7-14-H5_S318_L001_R1_001.fastq"
## [27] "rbcL-2020-7-14-H6_S319_L001_R1_001.fastq"
## [28] "rbcL-2020-7-15-H4_S321_L001_R1_001.fastq"
## [29] "rbcL-2020-7-15-H8_S322_L001_R1_001.fastq"
## [30] "rbcL-2020-7-16-H3_S323_L001_R1_001.fastq"
## [31] "rbcL-2020-7-16-H7_S324_L001_R1_001.fastq"
## [32] "rbcL-2020-7-16-H9_S325_L001_R1_001.fastq"
## [33] "rbcL-2020-7-2-H3_S326_L001_R1_001.fastq"
## [34] "rbcL-2020-7-2-H7_S327_L001_R1_001.fastq"
## [35] "rbcL-2020-7-2-H9_S328_L001_R1_001.fastq"
## [36] "rbcL-2021-6-13-H1_S329_L001_R1_001.fastq"
## [37] "rbcL-2021-6-13-H3_S330_L001_R1_001.fastq"
## [38] "rbcL-2021-6-14-H11_S331_L001_R1_001.fastq"
## [39] "rbcL-2021-6-14-H6_S332_L001_R1_001.fastq"
## [40] "rbcL-2021-6-14-H7_S333_L001_R1_001.fastq"
## [41] "rbcL-2021-6-15-H8_S334_L001_R1_001.fastq"
## [42] "rbcL-2021-6-21-H10_S335_L001_R1_001.fastq"
## [43] "rbcL-2021-6-21-H12_S336_L001_R1_001.fastq"
## [44] "rbcL-2021-6-21-H9_S337_L001_R1_001.fastq"
## [45] "rbcL-2021-6-27-H21_S338_L001_R1_001.fastq"
## [46] "rbcL-2021-6-27-H22_S339_L001_R1_001.fastq"
## [47] "rbcL-2021-6-27-H27_S340_L001_R1_001.fastq"
## [48] "rbcL-2021-6-28-H25_S341_L001_R1_001.fastq"
## [49] "rbcL-2021-6-28-H26_S342_L001_R1_001.fastq"
## [50] "rbcL-2021-6-28-H28_S343_L001_R1_001.fastq"
## [51] "rbcL-2021-6-29-H17_S344_L001_R1_001.fastq"
## [52] "rbcL-2021-6-29-H23_S345_L001_R1_001.fastq"
## [53] "rbcL-2021-6-29-H24_S346_L001_R1_001.fastq"
## [54] "rbcL-2021-6-4-H21_S347_L001_R1_001.fastq"
## [55] "rbcL-2021-6-4-H22_S348_L001_R1_001.fastq"
## [56] "rbcL-2021-6-4-H27_S349_L001_R1_001.fastq"
## [57] "rbcL-2021-6-5-H18_S350_L001_R1_001.fastq"
## [58] "rbcL-2021-6-5-H25_S351_L001_R1_001.fastq"
## [59] "rbcL-2021-6-5-H26_S352_L001_R1_001.fastq"
## [60] "rbcL-2021-6-6-H17_S353_L001_R1_001.fastq"
## [61] "rbcL-2021-6-6-H24_S354_L001_R1_001.fastq"
## [62] "rbcL-2021-6-7-H23_S355_L001_R1_001.fastq"
## [63] "rbcL-2021-7-14-H10_S356_L001_R1_001.fastq"
## [64] "rbcL-2021-7-14-H12_S357_L001_R1_001.fastq"
## [65] "rbcL-2021-7-20-H27_S358_L001_R1_001.fastq"
## [66] "rbcL-2021-7-21-H25_S359_L001_R1_001.fastq"
## [67] "rbcL-2021-7-21-H26_S360_L001_R1_001.fastq"
## [68] "rbcL-2021-7-6-H11_S362_L001_R1_001.fastq"
## [69] "rbcL-2021-7-6-H6_S364_L001_R1_001.fastq"
## [70] "rbcL-2021-7-7-H4_S365_L001_R1_001.fastq"
## [71] "rbcL-2021-7-7-H8_S366_L001_R1_001.fastq"
## [72] "rbcL-2021-7-8-H3_S367_L001_R1_001.fastq"
## [73] "rbcL-2023-6-12-H3_S368_L001_R1_001.fastq"
## [74] "rbcL-2023-6-12-H5_S369_L001_R1_001.fastq"
## [75] "rbcL-2023-6-12-H7_S370_L001_R1_001.fastq"
## [76] "rbcL-2023-6-13-H6_S371_L001_R1_001.fastq"
## [77] "rbcL-2023-6-13-H8_S372_L001_R1_001.fastq"
## [78] "rbcL-2023-6-13-H9_S373_L001_R1_001.fastq"
## [79] "rbcL-2023-6-14-H3_S374_L001_R1_001.fastq"
## [80] "rbcL-2023-6-14-H7_S375_L001_R1_001.fastq"
## [81] "rbcL-2023-6-14-H9_S376_L001_R1_001.fastq"
## [82] "rbcL-2023-6-16-H5_S377_L001_R1_001.fastq"
## [83] "rbcL-2023-6-24-H6_S378_L001_R1_001.fastq"
## [84] "rbcL-2023-6-24-H8_S379_L001_R1_001.fastq"
## [85] "rbcL-2023-6-25-H2_S380_L001_R1_001.fastq"
## [86] "rbcL-2023-6-25-H4_S381_L001_R1_001.fastq"
## [87] "rbcL-2023-6-26-H1_S382_L001_R1_001.fastq"
## [88] "rbcL-2023-6-26-H7_S383_L001_R1_001.fastq"
## [89] "rbcL-2023-6-27-H3_S384_L001_R1_001.fastq"
## [90] "rbcL-2023-6-27-H5_S385_L001_R1_001.fastq"
## [91] "rbcL-2023-6-8-H1_S386_L001_R1_001.fastq"
## [92] "rbcL-2023-6-8-H2_S387_L001_R1_001.fastq"
## [93] "rbcL-2023-6-8-H4_S388_L001_R1_001.fastq"
## [94] "rbcL-2023-6-9-H2_S389_L001_R1_001.fastq"
## [95] "rbcL-2023-6-9-H4_S390_L001_R1_001.fastq"
## [96] "rbcL-2023-7-15-H6_S391_L001_R1_001.fastq"
## [97] "rbcL-2023-7-16-H4_S392_L001_R1_001.fastq"
## [98] "rbcL-2023-7-17-H1_S393_L001_R1_001.fastq"
## [99] "rbcL-2023-7-18-H3_S394_L001_R1_001.fastq"
## [100] "rbcL-2023-7-18-H7_S395_L001_R1_001.fastq"
## [101] "rbcL-2023-7-29-H5_S396_L001_R1_001.fastq"
## [102] "rbcL-2023-7-29-H7_S397_L001_R1_001.fastq"
## [103] "rbcL-2023-7-30-H8_S398_L001_R1_001.fastq"
## [104] "rbcL-2023-7-30-H9_S399_L001_R1_001.fastq"
## [105] "rbcL-2023-7-5-H1_S400_L001_R1_001.fastq"
## [106] "rbcL-2023-7-5-H2_S401_L001_R1_001.fastq"
## [107] "rbcL-2023-7-5-H4_S402_L001_R1_001.fastq"
## [108] "rbcL-2023-7-6-H6_S403_L001_R1_001.fastq"
## [109] "rbcL-2023-7-6-H8_S404_L001_R1_001.fastq"
## [110] "rbcL-2023-7-6-H9_S405_L001_R1_001.fastq"
## [111] "rbcL-2023-7-8-H3_S406_L001_R1_001.fastq"
## [112] "rbcL-2023-7-8-H5_S407_L001_R1_001.fastq"
## [113] "rbcL-2023-7-8-H7_S408_L001_R1_001.fastq"
## [114] "rbcL-2023-8-4-H2_S409_L001_R1_001.fastq"
## [115] "rbcL-2023-8-4-H5_S410_L001_R1_001.fastq"
## [116] "rbcL-2023-8-4-H6_S411_L001_R1_001.fastq"
## [117] "rbcL-2023-8-4-H7_S412_L001_R1_001.fastq"
## [118] "rbcL-2023-8-4-H8_S413_L001_R1_001.fastq"
## [119] "rbcL-2023-8-4-H9_S414_L001_R1_001.fastq"
## [120] "rbcL-Ba001_S415_L001_R1_001.fastq"
## [121] "rbcL-Ba002_S416_L001_R1_001.fastq"
## [122] "rbcL-Ba003_S417_L001_R1_001.fastq"
## [123] "rbcL-Bb001_S418_L001_R1_001.fastq"
## [124] "rbcL-Bb002_S419_L001_R1_001.fastq"
## [125] "rbcL-Bb003_S420_L001_R1_001.fastq"
## [126] "rbcL-Bb004_S421_L001_R1_001.fastq"
## [127] "rbcL-Bb005_S422_L001_R1_001.fastq"
## [128] "rbcL-Bb007_S423_L001_R1_001.fastq"
## [129] "rbcL-Bb008_S424_L001_R1_001.fastq"
## [130] "rbcL-Bb009_S425_L001_R1_001.fastq"
## [131] "rbcL-Bb010_S426_L001_R1_001.fastq"
## [132] "rbcL-Bb011_S427_L001_R1_001.fastq"
## [133] "rbcL-Bb012_S428_L001_R1_001.fastq"
## [134] "rbcL-Bb013_S429_L001_R1_001.fastq"
## [135] "rbcL-Bb014_S430_L001_R1_001.fastq"
## [136] "rbcL-Bb015_S431_L001_R1_001.fastq"
## [137] "rbcL-Bb016_S432_L001_R1_001.fastq"
## [138] "rbcL-Bb017_S433_L001_R1_001.fastq"
## [139] "rbcL-Bb018_S434_L001_R1_001.fastq"
## [140] "rbcL-Bb019_S435_L001_R1_001.fastq"
## [141] "rbcL-Bb020_S436_L001_R1_001.fastq"
## [142] "rbcL-Bb021_S437_L001_R1_001.fastq"
## [143] "rbcL-Bb022_S438_L001_R1_001.fastq"
## [144] "rbcL-Bb023_S439_L001_R1_001.fastq"
## [145] "rbcL-Bb024_S440_L001_R1_001.fastq"
## [146] "rbcL-Bb025_S441_L001_R1_001.fastq"
## [147] "rbcL-Bf001_S442_L001_R1_001.fastq"
## [148] "rbcL-Bf002_S443_L001_R1_001.fastq"
## [149] "rbcL-Bf003_S444_L001_R1_001.fastq"
## [150] "rbcL-Bf004_S445_L001_R1_001.fastq"
## [151] "rbcL-Bg001_S446_L001_R1_001.fastq"
## [152] "rbcL-Bg002_S447_L001_R1_001.fastq"
## [153] "rbcL-Bg003_S448_L001_R1_001.fastq"
## [154] "rbcL-Bg004_S449_L001_R1_001.fastq"
## [155] "rbcL-Bg005_S450_L001_R1_001.fastq"
## [156] "rbcL-Bg006_S451_L001_R1_001.fastq"
## [157] "rbcL-Bg007_S452_L001_R1_001.fastq"
## [158] "rbcL-Bg008_S453_L001_R1_001.fastq"
## [159] "rbcL-Bg009_S454_L001_R1_001.fastq"
## [160] "rbcL-Bg010_S455_L001_R1_001.fastq"
## [161] "rbcL-Bg011_S456_L001_R1_001.fastq"
## [162] "rbcL-Bg012_S457_L001_R1_001.fastq"
## [163] "rbcL-Bg013_S458_L001_R1_001.fastq"
## [164] "rbcL-Bg014_S459_L001_R1_001.fastq"
## [165] "rbcL-Bg015_S460_L001_R1_001.fastq"
## [166] "rbcL-Bg016_S461_L001_R1_001.fastq"
## [167] "rbcL-Bg017_S462_L001_R1_001.fastq"
## [168] "rbcL-Bg018_S463_L001_R1_001.fastq"
## [169] "rbcL-Bg019_S464_L001_R1_001.fastq"
## [170] "rbcL-Bi001_S465_L001_R1_001.fastq"
## [171] "rbcL-Bi002_S466_L001_R1_001.fastq"
## [172] "rbcL-Bi003_S467_L001_R1_001.fastq"
## [173] "rbcL-Bi004_S468_L001_R1_001.fastq"
## [174] "rbcL-Bi005_S469_L001_R1_001.fastq"
## [175] "rbcL-Bi006_S470_L001_R1_001.fastq"
## [176] "rbcL-Bi007_S471_L001_R1_001.fastq"
## [177] "rbcL-CKC0001_S472_L001_R1_001.fastq"
## [178] "rbcL-ESE0004_S473_L001_R1_001.fastq"
## [179] "rbcL-ext-neg-ctrl-20230909_S474_L001_R1_001.fastq"
## [180] "rbcL-ext-neg-ctrl-20230923_S475_L001_R1_001.fastq"
## [181] "rbcL-ext-neg-ctrl-20230924_S476_L001_R1_001.fastq"
## [182] "rbcL-ext-neg-ctrl-20231007_S477_L001_R1_001.fastq"
## [183] "rbcL-ext-neg-ctrl-20231008_S478_L001_R1_001.fastq"
## [184] "rbcL-ext-neg-ctrl-20231009_S479_L001_R1_001.fastq"
## [185] "rbcL-ext-neg-ctrl-2024220A_S480_L001_R1_001.fastq"
## [186] "rbcL-ext-neg-ctrl-2024220B_S481_L001_R1_001.fastq"
## [187] "rbcL-ext-neg-ctrl-2024221A_S482_L001_R1_001.fastq"
## [188] "rbcL-ext-neg-ctrl-2024221B_S483_L001_R1_001.fastq"
## [189] "rbcL-ext-neg-ctrl-2024222A_S484_L001_R1_001.fastq"
## [190] "rbcL-ext-neg-ctrl-2024222B_S485_L001_R1_001.fastq"
## [191] "rbcL-ext-neg-ctrl-2024312A_S486_L001_R1_001.fastq"
## [192] "rbcL-ext-neg-ctrl-2024312B_S487_L001_R1_001.fastq"
## [193] "rbcL-ext-neg-ctrl-2024314A_S488_L001_R1_001.fastq"
## [194] "rbcL-ext-neg-ctrl-2024314B_S489_L001_R1_001.fastq"
## [195] "rbcL-ext-neg-ctrl-2024319_S490_L001_R1_001.fastq"
## [196] "rbcL-ext-neg-ctrl-2024320_S491_L001_R1_001.fastq"
## [197] "rbcL-KLS0007_S492_L001_R1_001.fastq"
## [198] "rbcL-KLS0027_S494_L001_R1_001.fastq"
## [199] "rbcL-KLS0044_S495_L001_R1_001.fastq"
## [200] "rbcL-KLS0045_S496_L001_R1_001.fastq"
## [201] "rbcL-KLS0052_S497_L001_R1_001.fastq"
## [202] "rbcL-KLS0054_S498_L001_R1_001.fastq"
## [203] "rbcL-KLS0055_S499_L001_R1_001.fastq"
## [204] "rbcL-KLS0071_S500_L001_R1_001.fastq"
## [205] "rbcL-KLS0095_S501_L001_R1_001.fastq"
## [206] "rbcL-KLS0096_S502_L001_R1_001.fastq"
## [207] "rbcL-KLS0105_S503_L001_R1_001.fastq"
## [208] "rbcL-KLS0106_S504_L001_R1_001.fastq"
## [209] "rbcL-KLS0119_S505_L001_R1_001.fastq"
## [210] "rbcL-KLS0134_S506_L001_R1_001.fastq"
## [211] "rbcL-KLS0135_S507_L001_R1_001.fastq"
## [212] "rbcL-KLS0136_S508_L001_R1_001.fastq"
## [213] "rbcL-KLS0137_S509_L001_R1_001.fastq"
## [214] "rbcL-KLS0138_S510_L001_R1_001.fastq"
## [215] "rbcL-KLS0139_S511_L001_R1_001.fastq"
## [216] "rbcL-KLS0150_S512_L001_R1_001.fastq"
## [217] "rbcL-KLS0153_S513_L001_R1_001.fastq"
## [218] "rbcL-KLS0155_S514_L001_R1_001.fastq"
## [219] "rbcL-KLS0156_S515_L001_R1_001.fastq"
## [220] "rbcL-KLS0159_S516_L001_R1_001.fastq"
## [221] "rbcL-KLS0163_S517_L001_R1_001.fastq"
## [222] "rbcL-KLS0165_S518_L001_R1_001.fastq"
## [223] "rbcL-KLS0167_S519_L001_R1_001.fastq"
## [224] "rbcL-KLS0168_S520_L001_R1_001.fastq"
## [225] "rbcL-KLS0169_S521_L001_R1_001.fastq"
## [226] "rbcL-KLS0170_S522_L001_R1_001.fastq"
## [227] "rbcL-KLS0200_S523_L001_R1_001.fastq"
## [228] "rbcL-KLS0201_S524_L001_R1_001.fastq"
## [229] "rbcL-KLS0205_S525_L001_R1_001.fastq"
## [230] "rbcL-KLS0209_S526_L001_R1_001.fastq"
## [231] "rbcL-KLS0221_S527_L001_R1_001.fastq"
## [232] "rbcL-KLS0224_S528_L001_R1_001.fastq"
## [233] "rbcL-KLS0225_S529_L001_R1_001.fastq"
## [234] "rbcL-KLS0227_S530_L001_R1_001.fastq"
## [235] "rbcL-KLS0241_S531_L001_R1_001.fastq"
## [236] "rbcL-KLS0244_S532_L001_R1_001.fastq"
## [237] "rbcL-KLS0246_S533_L001_R1_001.fastq"
## [238] "rbcL-KLS0248_S534_L001_R1_001.fastq"
## [239] "rbcL-KLS0253_S535_L001_R1_001.fastq"
## [240] "rbcL-KLS0254_S536_L001_R1_001.fastq"
## [241] "rbcL-KLS0256_S493_L001_R1_001.fastq"
## [242] "rbcL-KLS0259_S537_L001_R1_001.fastq"
## [243] "rbcL-KLS0263_S538_L001_R1_001.fastq"
## [244] "rbcL-KLS0266_S539_L001_R1_001.fastq"
## [245] "rbcL-KLS0272_S540_L001_R1_001.fastq"
## [246] "rbcL-pcr-rbcL-neg-crtl-20240417_S541_L001_R1_001.fastq"
## [247] "rbcL-pcr-rbcL-neg-ctrl-20240409_S542_L001_R1_001.fastq"
## [248] "rbcL-pcr-rbcL-neg-ctrl-20240418A_S543_L001_R1_001.fastq"
## [249] "rbcL-pcr-rbcL-neg-ctrl-20240418B_S544_L001_R1_001.fastq"
## [250] "rbcL-pcr-rbcL-neg-ctrl-20240523_S545_L001_R1_001.fastq"
## [251] "rbcL-pcr-rbcL-neg-ctrl-20240531_S546_L001_R1_001.fastq"
## [252] "rbcL-pcr-rbcL-neg-ctrl-Greeshma-20240416_S547_L001_R1_001.fastq"
## [253] "rbcL-rbcL-pcr-neg-ctrl-20231021-20231119_S548_L001_R1_001.fastq"
## [254] "rbcL-rbcL-pcr-neg-ctrl-20231022-20231120_S549_L001_R1_001.fastq"
## [255] "rbcL-rbcL-pcr-neg-ctrl-20231023-20231121_S550_L001_R1_001.fastq"
## [256] "rbcL-SCA0009_S551_L001_R1_001.fastq"
## [257] "rbcL-SCA0010_S552_L001_R1_001.fastq"
## [258] "rbcL-SCA0013_S553_L001_R1_001.fastq"
strsplit(rownames(as.data.frame(out)), "_S")
## [[1]]
## [1] "rbcL-2020-6-16-H1" "293_L001_R1_001.fastq"
##
## [[2]]
## [1] "rbcL-2020-6-16-H5" "294_L001_R1_001.fastq"
##
## [[3]]
## [1] "rbcL-2020-6-16-H6" "295_L001_R1_001.fastq"
##
## [[4]]
## [1] "rbcL-2020-6-17-H2" "296_L001_R1_001.fastq"
##
## [[5]]
## [1] "rbcL-2020-6-17-H4" "297_L001_R1_001.fastq"
##
## [[6]]
## [1] "rbcL-2020-6-17-H8" "298_L001_R1_001.fastq"
##
## [[7]]
## [1] "rbcL-2020-6-18-H3" "299_L001_R1_001.fastq"
##
## [[8]]
## [1] "rbcL-2020-6-18-H7" "300_L001_R1_001.fastq"
##
## [[9]]
## [1] "rbcL-2020-6-18-H9" "301_L001_R1_001.fastq"
##
## [[10]]
## [1] "rbcL-2020-6-3-H1" "302_L001_R1_001.fastq"
##
## [[11]]
## [1] "rbcL-2020-6-3-H5" "303_L001_R1_001.fastq"
##
## [[12]]
## [1] "rbcL-2020-6-3-H6" "304_L001_R1_001.fastq"
##
## [[13]]
## [1] "rbcL-2020-6-30-H1" "305_L001_R1_001.fastq"
##
## [[14]]
## [1] "rbcL-2020-6-30-H5" "306_L001_R1_001.fastq"
##
## [[15]]
## [1] "rbcL-2020-6-30-H6" "307_L001_R1_001.fastq"
##
## [[16]]
## [1] "rbcL-2020-6-4-H2" "308_L001_R1_001.fastq"
##
## [[17]]
## [1] "rbcL-2020-6-4-H4" "309_L001_R1_001.fastq"
##
## [[18]]
## [1] "rbcL-2020-6-4-H8" "310_L001_R1_001.fastq"
##
## [[19]]
## [1] "rbcL-2020-6-5-H3" "311_L001_R1_001.fastq"
##
## [[20]]
## [1] "rbcL-2020-6-5-H7" "312_L001_R1_001.fastq"
##
## [[21]]
## [1] "rbcL-2020-6-5-H9" "313_L001_R1_001.fastq"
##
## [[22]]
## [1] "rbcL-2020-7-1-H2" "314_L001_R1_001.fastq"
##
## [[23]]
## [1] "rbcL-2020-7-1-H4" "315_L001_R1_001.fastq"
##
## [[24]]
## [1] "rbcL-2020-7-1-H8" "316_L001_R1_001.fastq"
##
## [[25]]
## [1] "rbcL-2020-7-14-H1" "317_L001_R1_001.fastq"
##
## [[26]]
## [1] "rbcL-2020-7-14-H5" "318_L001_R1_001.fastq"
##
## [[27]]
## [1] "rbcL-2020-7-14-H6" "319_L001_R1_001.fastq"
##
## [[28]]
## [1] "rbcL-2020-7-15-H4" "321_L001_R1_001.fastq"
##
## [[29]]
## [1] "rbcL-2020-7-15-H8" "322_L001_R1_001.fastq"
##
## [[30]]
## [1] "rbcL-2020-7-16-H3" "323_L001_R1_001.fastq"
##
## [[31]]
## [1] "rbcL-2020-7-16-H7" "324_L001_R1_001.fastq"
##
## [[32]]
## [1] "rbcL-2020-7-16-H9" "325_L001_R1_001.fastq"
##
## [[33]]
## [1] "rbcL-2020-7-2-H3" "326_L001_R1_001.fastq"
##
## [[34]]
## [1] "rbcL-2020-7-2-H7" "327_L001_R1_001.fastq"
##
## [[35]]
## [1] "rbcL-2020-7-2-H9" "328_L001_R1_001.fastq"
##
## [[36]]
## [1] "rbcL-2021-6-13-H1" "329_L001_R1_001.fastq"
##
## [[37]]
## [1] "rbcL-2021-6-13-H3" "330_L001_R1_001.fastq"
##
## [[38]]
## [1] "rbcL-2021-6-14-H11" "331_L001_R1_001.fastq"
##
## [[39]]
## [1] "rbcL-2021-6-14-H6" "332_L001_R1_001.fastq"
##
## [[40]]
## [1] "rbcL-2021-6-14-H7" "333_L001_R1_001.fastq"
##
## [[41]]
## [1] "rbcL-2021-6-15-H8" "334_L001_R1_001.fastq"
##
## [[42]]
## [1] "rbcL-2021-6-21-H10" "335_L001_R1_001.fastq"
##
## [[43]]
## [1] "rbcL-2021-6-21-H12" "336_L001_R1_001.fastq"
##
## [[44]]
## [1] "rbcL-2021-6-21-H9" "337_L001_R1_001.fastq"
##
## [[45]]
## [1] "rbcL-2021-6-27-H21" "338_L001_R1_001.fastq"
##
## [[46]]
## [1] "rbcL-2021-6-27-H22" "339_L001_R1_001.fastq"
##
## [[47]]
## [1] "rbcL-2021-6-27-H27" "340_L001_R1_001.fastq"
##
## [[48]]
## [1] "rbcL-2021-6-28-H25" "341_L001_R1_001.fastq"
##
## [[49]]
## [1] "rbcL-2021-6-28-H26" "342_L001_R1_001.fastq"
##
## [[50]]
## [1] "rbcL-2021-6-28-H28" "343_L001_R1_001.fastq"
##
## [[51]]
## [1] "rbcL-2021-6-29-H17" "344_L001_R1_001.fastq"
##
## [[52]]
## [1] "rbcL-2021-6-29-H23" "345_L001_R1_001.fastq"
##
## [[53]]
## [1] "rbcL-2021-6-29-H24" "346_L001_R1_001.fastq"
##
## [[54]]
## [1] "rbcL-2021-6-4-H21" "347_L001_R1_001.fastq"
##
## [[55]]
## [1] "rbcL-2021-6-4-H22" "348_L001_R1_001.fastq"
##
## [[56]]
## [1] "rbcL-2021-6-4-H27" "349_L001_R1_001.fastq"
##
## [[57]]
## [1] "rbcL-2021-6-5-H18" "350_L001_R1_001.fastq"
##
## [[58]]
## [1] "rbcL-2021-6-5-H25" "351_L001_R1_001.fastq"
##
## [[59]]
## [1] "rbcL-2021-6-5-H26" "352_L001_R1_001.fastq"
##
## [[60]]
## [1] "rbcL-2021-6-6-H17" "353_L001_R1_001.fastq"
##
## [[61]]
## [1] "rbcL-2021-6-6-H24" "354_L001_R1_001.fastq"
##
## [[62]]
## [1] "rbcL-2021-6-7-H23" "355_L001_R1_001.fastq"
##
## [[63]]
## [1] "rbcL-2021-7-14-H10" "356_L001_R1_001.fastq"
##
## [[64]]
## [1] "rbcL-2021-7-14-H12" "357_L001_R1_001.fastq"
##
## [[65]]
## [1] "rbcL-2021-7-20-H27" "358_L001_R1_001.fastq"
##
## [[66]]
## [1] "rbcL-2021-7-21-H25" "359_L001_R1_001.fastq"
##
## [[67]]
## [1] "rbcL-2021-7-21-H26" "360_L001_R1_001.fastq"
##
## [[68]]
## [1] "rbcL-2021-7-6-H11" "362_L001_R1_001.fastq"
##
## [[69]]
## [1] "rbcL-2021-7-6-H6" "364_L001_R1_001.fastq"
##
## [[70]]
## [1] "rbcL-2021-7-7-H4" "365_L001_R1_001.fastq"
##
## [[71]]
## [1] "rbcL-2021-7-7-H8" "366_L001_R1_001.fastq"
##
## [[72]]
## [1] "rbcL-2021-7-8-H3" "367_L001_R1_001.fastq"
##
## [[73]]
## [1] "rbcL-2023-6-12-H3" "368_L001_R1_001.fastq"
##
## [[74]]
## [1] "rbcL-2023-6-12-H5" "369_L001_R1_001.fastq"
##
## [[75]]
## [1] "rbcL-2023-6-12-H7" "370_L001_R1_001.fastq"
##
## [[76]]
## [1] "rbcL-2023-6-13-H6" "371_L001_R1_001.fastq"
##
## [[77]]
## [1] "rbcL-2023-6-13-H8" "372_L001_R1_001.fastq"
##
## [[78]]
## [1] "rbcL-2023-6-13-H9" "373_L001_R1_001.fastq"
##
## [[79]]
## [1] "rbcL-2023-6-14-H3" "374_L001_R1_001.fastq"
##
## [[80]]
## [1] "rbcL-2023-6-14-H7" "375_L001_R1_001.fastq"
##
## [[81]]
## [1] "rbcL-2023-6-14-H9" "376_L001_R1_001.fastq"
##
## [[82]]
## [1] "rbcL-2023-6-16-H5" "377_L001_R1_001.fastq"
##
## [[83]]
## [1] "rbcL-2023-6-24-H6" "378_L001_R1_001.fastq"
##
## [[84]]
## [1] "rbcL-2023-6-24-H8" "379_L001_R1_001.fastq"
##
## [[85]]
## [1] "rbcL-2023-6-25-H2" "380_L001_R1_001.fastq"
##
## [[86]]
## [1] "rbcL-2023-6-25-H4" "381_L001_R1_001.fastq"
##
## [[87]]
## [1] "rbcL-2023-6-26-H1" "382_L001_R1_001.fastq"
##
## [[88]]
## [1] "rbcL-2023-6-26-H7" "383_L001_R1_001.fastq"
##
## [[89]]
## [1] "rbcL-2023-6-27-H3" "384_L001_R1_001.fastq"
##
## [[90]]
## [1] "rbcL-2023-6-27-H5" "385_L001_R1_001.fastq"
##
## [[91]]
## [1] "rbcL-2023-6-8-H1" "386_L001_R1_001.fastq"
##
## [[92]]
## [1] "rbcL-2023-6-8-H2" "387_L001_R1_001.fastq"
##
## [[93]]
## [1] "rbcL-2023-6-8-H4" "388_L001_R1_001.fastq"
##
## [[94]]
## [1] "rbcL-2023-6-9-H2" "389_L001_R1_001.fastq"
##
## [[95]]
## [1] "rbcL-2023-6-9-H4" "390_L001_R1_001.fastq"
##
## [[96]]
## [1] "rbcL-2023-7-15-H6" "391_L001_R1_001.fastq"
##
## [[97]]
## [1] "rbcL-2023-7-16-H4" "392_L001_R1_001.fastq"
##
## [[98]]
## [1] "rbcL-2023-7-17-H1" "393_L001_R1_001.fastq"
##
## [[99]]
## [1] "rbcL-2023-7-18-H3" "394_L001_R1_001.fastq"
##
## [[100]]
## [1] "rbcL-2023-7-18-H7" "395_L001_R1_001.fastq"
##
## [[101]]
## [1] "rbcL-2023-7-29-H5" "396_L001_R1_001.fastq"
##
## [[102]]
## [1] "rbcL-2023-7-29-H7" "397_L001_R1_001.fastq"
##
## [[103]]
## [1] "rbcL-2023-7-30-H8" "398_L001_R1_001.fastq"
##
## [[104]]
## [1] "rbcL-2023-7-30-H9" "399_L001_R1_001.fastq"
##
## [[105]]
## [1] "rbcL-2023-7-5-H1" "400_L001_R1_001.fastq"
##
## [[106]]
## [1] "rbcL-2023-7-5-H2" "401_L001_R1_001.fastq"
##
## [[107]]
## [1] "rbcL-2023-7-5-H4" "402_L001_R1_001.fastq"
##
## [[108]]
## [1] "rbcL-2023-7-6-H6" "403_L001_R1_001.fastq"
##
## [[109]]
## [1] "rbcL-2023-7-6-H8" "404_L001_R1_001.fastq"
##
## [[110]]
## [1] "rbcL-2023-7-6-H9" "405_L001_R1_001.fastq"
##
## [[111]]
## [1] "rbcL-2023-7-8-H3" "406_L001_R1_001.fastq"
##
## [[112]]
## [1] "rbcL-2023-7-8-H5" "407_L001_R1_001.fastq"
##
## [[113]]
## [1] "rbcL-2023-7-8-H7" "408_L001_R1_001.fastq"
##
## [[114]]
## [1] "rbcL-2023-8-4-H2" "409_L001_R1_001.fastq"
##
## [[115]]
## [1] "rbcL-2023-8-4-H5" "410_L001_R1_001.fastq"
##
## [[116]]
## [1] "rbcL-2023-8-4-H6" "411_L001_R1_001.fastq"
##
## [[117]]
## [1] "rbcL-2023-8-4-H7" "412_L001_R1_001.fastq"
##
## [[118]]
## [1] "rbcL-2023-8-4-H8" "413_L001_R1_001.fastq"
##
## [[119]]
## [1] "rbcL-2023-8-4-H9" "414_L001_R1_001.fastq"
##
## [[120]]
## [1] "rbcL-Ba001" "415_L001_R1_001.fastq"
##
## [[121]]
## [1] "rbcL-Ba002" "416_L001_R1_001.fastq"
##
## [[122]]
## [1] "rbcL-Ba003" "417_L001_R1_001.fastq"
##
## [[123]]
## [1] "rbcL-Bb001" "418_L001_R1_001.fastq"
##
## [[124]]
## [1] "rbcL-Bb002" "419_L001_R1_001.fastq"
##
## [[125]]
## [1] "rbcL-Bb003" "420_L001_R1_001.fastq"
##
## [[126]]
## [1] "rbcL-Bb004" "421_L001_R1_001.fastq"
##
## [[127]]
## [1] "rbcL-Bb005" "422_L001_R1_001.fastq"
##
## [[128]]
## [1] "rbcL-Bb007" "423_L001_R1_001.fastq"
##
## [[129]]
## [1] "rbcL-Bb008" "424_L001_R1_001.fastq"
##
## [[130]]
## [1] "rbcL-Bb009" "425_L001_R1_001.fastq"
##
## [[131]]
## [1] "rbcL-Bb010" "426_L001_R1_001.fastq"
##
## [[132]]
## [1] "rbcL-Bb011" "427_L001_R1_001.fastq"
##
## [[133]]
## [1] "rbcL-Bb012" "428_L001_R1_001.fastq"
##
## [[134]]
## [1] "rbcL-Bb013" "429_L001_R1_001.fastq"
##
## [[135]]
## [1] "rbcL-Bb014" "430_L001_R1_001.fastq"
##
## [[136]]
## [1] "rbcL-Bb015" "431_L001_R1_001.fastq"
##
## [[137]]
## [1] "rbcL-Bb016" "432_L001_R1_001.fastq"
##
## [[138]]
## [1] "rbcL-Bb017" "433_L001_R1_001.fastq"
##
## [[139]]
## [1] "rbcL-Bb018" "434_L001_R1_001.fastq"
##
## [[140]]
## [1] "rbcL-Bb019" "435_L001_R1_001.fastq"
##
## [[141]]
## [1] "rbcL-Bb020" "436_L001_R1_001.fastq"
##
## [[142]]
## [1] "rbcL-Bb021" "437_L001_R1_001.fastq"
##
## [[143]]
## [1] "rbcL-Bb022" "438_L001_R1_001.fastq"
##
## [[144]]
## [1] "rbcL-Bb023" "439_L001_R1_001.fastq"
##
## [[145]]
## [1] "rbcL-Bb024" "440_L001_R1_001.fastq"
##
## [[146]]
## [1] "rbcL-Bb025" "441_L001_R1_001.fastq"
##
## [[147]]
## [1] "rbcL-Bf001" "442_L001_R1_001.fastq"
##
## [[148]]
## [1] "rbcL-Bf002" "443_L001_R1_001.fastq"
##
## [[149]]
## [1] "rbcL-Bf003" "444_L001_R1_001.fastq"
##
## [[150]]
## [1] "rbcL-Bf004" "445_L001_R1_001.fastq"
##
## [[151]]
## [1] "rbcL-Bg001" "446_L001_R1_001.fastq"
##
## [[152]]
## [1] "rbcL-Bg002" "447_L001_R1_001.fastq"
##
## [[153]]
## [1] "rbcL-Bg003" "448_L001_R1_001.fastq"
##
## [[154]]
## [1] "rbcL-Bg004" "449_L001_R1_001.fastq"
##
## [[155]]
## [1] "rbcL-Bg005" "450_L001_R1_001.fastq"
##
## [[156]]
## [1] "rbcL-Bg006" "451_L001_R1_001.fastq"
##
## [[157]]
## [1] "rbcL-Bg007" "452_L001_R1_001.fastq"
##
## [[158]]
## [1] "rbcL-Bg008" "453_L001_R1_001.fastq"
##
## [[159]]
## [1] "rbcL-Bg009" "454_L001_R1_001.fastq"
##
## [[160]]
## [1] "rbcL-Bg010" "455_L001_R1_001.fastq"
##
## [[161]]
## [1] "rbcL-Bg011" "456_L001_R1_001.fastq"
##
## [[162]]
## [1] "rbcL-Bg012" "457_L001_R1_001.fastq"
##
## [[163]]
## [1] "rbcL-Bg013" "458_L001_R1_001.fastq"
##
## [[164]]
## [1] "rbcL-Bg014" "459_L001_R1_001.fastq"
##
## [[165]]
## [1] "rbcL-Bg015" "460_L001_R1_001.fastq"
##
## [[166]]
## [1] "rbcL-Bg016" "461_L001_R1_001.fastq"
##
## [[167]]
## [1] "rbcL-Bg017" "462_L001_R1_001.fastq"
##
## [[168]]
## [1] "rbcL-Bg018" "463_L001_R1_001.fastq"
##
## [[169]]
## [1] "rbcL-Bg019" "464_L001_R1_001.fastq"
##
## [[170]]
## [1] "rbcL-Bi001" "465_L001_R1_001.fastq"
##
## [[171]]
## [1] "rbcL-Bi002" "466_L001_R1_001.fastq"
##
## [[172]]
## [1] "rbcL-Bi003" "467_L001_R1_001.fastq"
##
## [[173]]
## [1] "rbcL-Bi004" "468_L001_R1_001.fastq"
##
## [[174]]
## [1] "rbcL-Bi005" "469_L001_R1_001.fastq"
##
## [[175]]
## [1] "rbcL-Bi006" "470_L001_R1_001.fastq"
##
## [[176]]
## [1] "rbcL-Bi007" "471_L001_R1_001.fastq"
##
## [[177]]
## [1] "rbcL-CKC0001" "472_L001_R1_001.fastq"
##
## [[178]]
## [1] "rbcL-ESE0004" "473_L001_R1_001.fastq"
##
## [[179]]
## [1] "rbcL-ext-neg-ctrl-20230909" "474_L001_R1_001.fastq"
##
## [[180]]
## [1] "rbcL-ext-neg-ctrl-20230923" "475_L001_R1_001.fastq"
##
## [[181]]
## [1] "rbcL-ext-neg-ctrl-20230924" "476_L001_R1_001.fastq"
##
## [[182]]
## [1] "rbcL-ext-neg-ctrl-20231007" "477_L001_R1_001.fastq"
##
## [[183]]
## [1] "rbcL-ext-neg-ctrl-20231008" "478_L001_R1_001.fastq"
##
## [[184]]
## [1] "rbcL-ext-neg-ctrl-20231009" "479_L001_R1_001.fastq"
##
## [[185]]
## [1] "rbcL-ext-neg-ctrl-2024220A" "480_L001_R1_001.fastq"
##
## [[186]]
## [1] "rbcL-ext-neg-ctrl-2024220B" "481_L001_R1_001.fastq"
##
## [[187]]
## [1] "rbcL-ext-neg-ctrl-2024221A" "482_L001_R1_001.fastq"
##
## [[188]]
## [1] "rbcL-ext-neg-ctrl-2024221B" "483_L001_R1_001.fastq"
##
## [[189]]
## [1] "rbcL-ext-neg-ctrl-2024222A" "484_L001_R1_001.fastq"
##
## [[190]]
## [1] "rbcL-ext-neg-ctrl-2024222B" "485_L001_R1_001.fastq"
##
## [[191]]
## [1] "rbcL-ext-neg-ctrl-2024312A" "486_L001_R1_001.fastq"
##
## [[192]]
## [1] "rbcL-ext-neg-ctrl-2024312B" "487_L001_R1_001.fastq"
##
## [[193]]
## [1] "rbcL-ext-neg-ctrl-2024314A" "488_L001_R1_001.fastq"
##
## [[194]]
## [1] "rbcL-ext-neg-ctrl-2024314B" "489_L001_R1_001.fastq"
##
## [[195]]
## [1] "rbcL-ext-neg-ctrl-2024319" "490_L001_R1_001.fastq"
##
## [[196]]
## [1] "rbcL-ext-neg-ctrl-2024320" "491_L001_R1_001.fastq"
##
## [[197]]
## [1] "rbcL-KLS0007" "492_L001_R1_001.fastq"
##
## [[198]]
## [1] "rbcL-KLS0027" "494_L001_R1_001.fastq"
##
## [[199]]
## [1] "rbcL-KLS0044" "495_L001_R1_001.fastq"
##
## [[200]]
## [1] "rbcL-KLS0045" "496_L001_R1_001.fastq"
##
## [[201]]
## [1] "rbcL-KLS0052" "497_L001_R1_001.fastq"
##
## [[202]]
## [1] "rbcL-KLS0054" "498_L001_R1_001.fastq"
##
## [[203]]
## [1] "rbcL-KLS0055" "499_L001_R1_001.fastq"
##
## [[204]]
## [1] "rbcL-KLS0071" "500_L001_R1_001.fastq"
##
## [[205]]
## [1] "rbcL-KLS0095" "501_L001_R1_001.fastq"
##
## [[206]]
## [1] "rbcL-KLS0096" "502_L001_R1_001.fastq"
##
## [[207]]
## [1] "rbcL-KLS0105" "503_L001_R1_001.fastq"
##
## [[208]]
## [1] "rbcL-KLS0106" "504_L001_R1_001.fastq"
##
## [[209]]
## [1] "rbcL-KLS0119" "505_L001_R1_001.fastq"
##
## [[210]]
## [1] "rbcL-KLS0134" "506_L001_R1_001.fastq"
##
## [[211]]
## [1] "rbcL-KLS0135" "507_L001_R1_001.fastq"
##
## [[212]]
## [1] "rbcL-KLS0136" "508_L001_R1_001.fastq"
##
## [[213]]
## [1] "rbcL-KLS0137" "509_L001_R1_001.fastq"
##
## [[214]]
## [1] "rbcL-KLS0138" "510_L001_R1_001.fastq"
##
## [[215]]
## [1] "rbcL-KLS0139" "511_L001_R1_001.fastq"
##
## [[216]]
## [1] "rbcL-KLS0150" "512_L001_R1_001.fastq"
##
## [[217]]
## [1] "rbcL-KLS0153" "513_L001_R1_001.fastq"
##
## [[218]]
## [1] "rbcL-KLS0155" "514_L001_R1_001.fastq"
##
## [[219]]
## [1] "rbcL-KLS0156" "515_L001_R1_001.fastq"
##
## [[220]]
## [1] "rbcL-KLS0159" "516_L001_R1_001.fastq"
##
## [[221]]
## [1] "rbcL-KLS0163" "517_L001_R1_001.fastq"
##
## [[222]]
## [1] "rbcL-KLS0165" "518_L001_R1_001.fastq"
##
## [[223]]
## [1] "rbcL-KLS0167" "519_L001_R1_001.fastq"
##
## [[224]]
## [1] "rbcL-KLS0168" "520_L001_R1_001.fastq"
##
## [[225]]
## [1] "rbcL-KLS0169" "521_L001_R1_001.fastq"
##
## [[226]]
## [1] "rbcL-KLS0170" "522_L001_R1_001.fastq"
##
## [[227]]
## [1] "rbcL-KLS0200" "523_L001_R1_001.fastq"
##
## [[228]]
## [1] "rbcL-KLS0201" "524_L001_R1_001.fastq"
##
## [[229]]
## [1] "rbcL-KLS0205" "525_L001_R1_001.fastq"
##
## [[230]]
## [1] "rbcL-KLS0209" "526_L001_R1_001.fastq"
##
## [[231]]
## [1] "rbcL-KLS0221" "527_L001_R1_001.fastq"
##
## [[232]]
## [1] "rbcL-KLS0224" "528_L001_R1_001.fastq"
##
## [[233]]
## [1] "rbcL-KLS0225" "529_L001_R1_001.fastq"
##
## [[234]]
## [1] "rbcL-KLS0227" "530_L001_R1_001.fastq"
##
## [[235]]
## [1] "rbcL-KLS0241" "531_L001_R1_001.fastq"
##
## [[236]]
## [1] "rbcL-KLS0244" "532_L001_R1_001.fastq"
##
## [[237]]
## [1] "rbcL-KLS0246" "533_L001_R1_001.fastq"
##
## [[238]]
## [1] "rbcL-KLS0248" "534_L001_R1_001.fastq"
##
## [[239]]
## [1] "rbcL-KLS0253" "535_L001_R1_001.fastq"
##
## [[240]]
## [1] "rbcL-KLS0254" "536_L001_R1_001.fastq"
##
## [[241]]
## [1] "rbcL-KLS0256" "493_L001_R1_001.fastq"
##
## [[242]]
## [1] "rbcL-KLS0259" "537_L001_R1_001.fastq"
##
## [[243]]
## [1] "rbcL-KLS0263" "538_L001_R1_001.fastq"
##
## [[244]]
## [1] "rbcL-KLS0266" "539_L001_R1_001.fastq"
##
## [[245]]
## [1] "rbcL-KLS0272" "540_L001_R1_001.fastq"
##
## [[246]]
## [1] "rbcL-pcr-rbcL-neg-crtl-20240417" "541_L001_R1_001.fastq"
##
## [[247]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240409" "542_L001_R1_001.fastq"
##
## [[248]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418A" "543_L001_R1_001.fastq"
##
## [[249]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418B" "544_L001_R1_001.fastq"
##
## [[250]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240523" "545_L001_R1_001.fastq"
##
## [[251]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240531" "546_L001_R1_001.fastq"
##
## [[252]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-Greeshma-20240416"
## [2] "547_L001_R1_001.fastq"
##
## [[253]]
## [1] "rbcL-rbcL-pcr-neg-ctrl-20231021-20231119"
## [2] "548_L001_R1_001.fastq"
##
## [[254]]
## [1] "rbcL-rbcL-pcr-neg-ctrl-20231022-20231120"
## [2] "549_L001_R1_001.fastq"
##
## [[255]]
## [1] "rbcL-rbcL-pcr-neg-ctrl-20231023-20231121"
## [2] "550_L001_R1_001.fastq"
##
## [[256]]
## [1] "rbcL-SCA0009" "551_L001_R1_001.fastq"
##
## [[257]]
## [1] "rbcL-SCA0010" "552_L001_R1_001.fastq"
##
## [[258]]
## [1] "rbcL-SCA0013" "553_L001_R1_001.fastq"
lapply(strsplit(rownames(as.data.frame(out)), "_S"), function(l) l[[1]])
## [[1]]
## [1] "rbcL-2020-6-16-H1"
##
## [[2]]
## [1] "rbcL-2020-6-16-H5"
##
## [[3]]
## [1] "rbcL-2020-6-16-H6"
##
## [[4]]
## [1] "rbcL-2020-6-17-H2"
##
## [[5]]
## [1] "rbcL-2020-6-17-H4"
##
## [[6]]
## [1] "rbcL-2020-6-17-H8"
##
## [[7]]
## [1] "rbcL-2020-6-18-H3"
##
## [[8]]
## [1] "rbcL-2020-6-18-H7"
##
## [[9]]
## [1] "rbcL-2020-6-18-H9"
##
## [[10]]
## [1] "rbcL-2020-6-3-H1"
##
## [[11]]
## [1] "rbcL-2020-6-3-H5"
##
## [[12]]
## [1] "rbcL-2020-6-3-H6"
##
## [[13]]
## [1] "rbcL-2020-6-30-H1"
##
## [[14]]
## [1] "rbcL-2020-6-30-H5"
##
## [[15]]
## [1] "rbcL-2020-6-30-H6"
##
## [[16]]
## [1] "rbcL-2020-6-4-H2"
##
## [[17]]
## [1] "rbcL-2020-6-4-H4"
##
## [[18]]
## [1] "rbcL-2020-6-4-H8"
##
## [[19]]
## [1] "rbcL-2020-6-5-H3"
##
## [[20]]
## [1] "rbcL-2020-6-5-H7"
##
## [[21]]
## [1] "rbcL-2020-6-5-H9"
##
## [[22]]
## [1] "rbcL-2020-7-1-H2"
##
## [[23]]
## [1] "rbcL-2020-7-1-H4"
##
## [[24]]
## [1] "rbcL-2020-7-1-H8"
##
## [[25]]
## [1] "rbcL-2020-7-14-H1"
##
## [[26]]
## [1] "rbcL-2020-7-14-H5"
##
## [[27]]
## [1] "rbcL-2020-7-14-H6"
##
## [[28]]
## [1] "rbcL-2020-7-15-H4"
##
## [[29]]
## [1] "rbcL-2020-7-15-H8"
##
## [[30]]
## [1] "rbcL-2020-7-16-H3"
##
## [[31]]
## [1] "rbcL-2020-7-16-H7"
##
## [[32]]
## [1] "rbcL-2020-7-16-H9"
##
## [[33]]
## [1] "rbcL-2020-7-2-H3"
##
## [[34]]
## [1] "rbcL-2020-7-2-H7"
##
## [[35]]
## [1] "rbcL-2020-7-2-H9"
##
## [[36]]
## [1] "rbcL-2021-6-13-H1"
##
## [[37]]
## [1] "rbcL-2021-6-13-H3"
##
## [[38]]
## [1] "rbcL-2021-6-14-H11"
##
## [[39]]
## [1] "rbcL-2021-6-14-H6"
##
## [[40]]
## [1] "rbcL-2021-6-14-H7"
##
## [[41]]
## [1] "rbcL-2021-6-15-H8"
##
## [[42]]
## [1] "rbcL-2021-6-21-H10"
##
## [[43]]
## [1] "rbcL-2021-6-21-H12"
##
## [[44]]
## [1] "rbcL-2021-6-21-H9"
##
## [[45]]
## [1] "rbcL-2021-6-27-H21"
##
## [[46]]
## [1] "rbcL-2021-6-27-H22"
##
## [[47]]
## [1] "rbcL-2021-6-27-H27"
##
## [[48]]
## [1] "rbcL-2021-6-28-H25"
##
## [[49]]
## [1] "rbcL-2021-6-28-H26"
##
## [[50]]
## [1] "rbcL-2021-6-28-H28"
##
## [[51]]
## [1] "rbcL-2021-6-29-H17"
##
## [[52]]
## [1] "rbcL-2021-6-29-H23"
##
## [[53]]
## [1] "rbcL-2021-6-29-H24"
##
## [[54]]
## [1] "rbcL-2021-6-4-H21"
##
## [[55]]
## [1] "rbcL-2021-6-4-H22"
##
## [[56]]
## [1] "rbcL-2021-6-4-H27"
##
## [[57]]
## [1] "rbcL-2021-6-5-H18"
##
## [[58]]
## [1] "rbcL-2021-6-5-H25"
##
## [[59]]
## [1] "rbcL-2021-6-5-H26"
##
## [[60]]
## [1] "rbcL-2021-6-6-H17"
##
## [[61]]
## [1] "rbcL-2021-6-6-H24"
##
## [[62]]
## [1] "rbcL-2021-6-7-H23"
##
## [[63]]
## [1] "rbcL-2021-7-14-H10"
##
## [[64]]
## [1] "rbcL-2021-7-14-H12"
##
## [[65]]
## [1] "rbcL-2021-7-20-H27"
##
## [[66]]
## [1] "rbcL-2021-7-21-H25"
##
## [[67]]
## [1] "rbcL-2021-7-21-H26"
##
## [[68]]
## [1] "rbcL-2021-7-6-H11"
##
## [[69]]
## [1] "rbcL-2021-7-6-H6"
##
## [[70]]
## [1] "rbcL-2021-7-7-H4"
##
## [[71]]
## [1] "rbcL-2021-7-7-H8"
##
## [[72]]
## [1] "rbcL-2021-7-8-H3"
##
## [[73]]
## [1] "rbcL-2023-6-12-H3"
##
## [[74]]
## [1] "rbcL-2023-6-12-H5"
##
## [[75]]
## [1] "rbcL-2023-6-12-H7"
##
## [[76]]
## [1] "rbcL-2023-6-13-H6"
##
## [[77]]
## [1] "rbcL-2023-6-13-H8"
##
## [[78]]
## [1] "rbcL-2023-6-13-H9"
##
## [[79]]
## [1] "rbcL-2023-6-14-H3"
##
## [[80]]
## [1] "rbcL-2023-6-14-H7"
##
## [[81]]
## [1] "rbcL-2023-6-14-H9"
##
## [[82]]
## [1] "rbcL-2023-6-16-H5"
##
## [[83]]
## [1] "rbcL-2023-6-24-H6"
##
## [[84]]
## [1] "rbcL-2023-6-24-H8"
##
## [[85]]
## [1] "rbcL-2023-6-25-H2"
##
## [[86]]
## [1] "rbcL-2023-6-25-H4"
##
## [[87]]
## [1] "rbcL-2023-6-26-H1"
##
## [[88]]
## [1] "rbcL-2023-6-26-H7"
##
## [[89]]
## [1] "rbcL-2023-6-27-H3"
##
## [[90]]
## [1] "rbcL-2023-6-27-H5"
##
## [[91]]
## [1] "rbcL-2023-6-8-H1"
##
## [[92]]
## [1] "rbcL-2023-6-8-H2"
##
## [[93]]
## [1] "rbcL-2023-6-8-H4"
##
## [[94]]
## [1] "rbcL-2023-6-9-H2"
##
## [[95]]
## [1] "rbcL-2023-6-9-H4"
##
## [[96]]
## [1] "rbcL-2023-7-15-H6"
##
## [[97]]
## [1] "rbcL-2023-7-16-H4"
##
## [[98]]
## [1] "rbcL-2023-7-17-H1"
##
## [[99]]
## [1] "rbcL-2023-7-18-H3"
##
## [[100]]
## [1] "rbcL-2023-7-18-H7"
##
## [[101]]
## [1] "rbcL-2023-7-29-H5"
##
## [[102]]
## [1] "rbcL-2023-7-29-H7"
##
## [[103]]
## [1] "rbcL-2023-7-30-H8"
##
## [[104]]
## [1] "rbcL-2023-7-30-H9"
##
## [[105]]
## [1] "rbcL-2023-7-5-H1"
##
## [[106]]
## [1] "rbcL-2023-7-5-H2"
##
## [[107]]
## [1] "rbcL-2023-7-5-H4"
##
## [[108]]
## [1] "rbcL-2023-7-6-H6"
##
## [[109]]
## [1] "rbcL-2023-7-6-H8"
##
## [[110]]
## [1] "rbcL-2023-7-6-H9"
##
## [[111]]
## [1] "rbcL-2023-7-8-H3"
##
## [[112]]
## [1] "rbcL-2023-7-8-H5"
##
## [[113]]
## [1] "rbcL-2023-7-8-H7"
##
## [[114]]
## [1] "rbcL-2023-8-4-H2"
##
## [[115]]
## [1] "rbcL-2023-8-4-H5"
##
## [[116]]
## [1] "rbcL-2023-8-4-H6"
##
## [[117]]
## [1] "rbcL-2023-8-4-H7"
##
## [[118]]
## [1] "rbcL-2023-8-4-H8"
##
## [[119]]
## [1] "rbcL-2023-8-4-H9"
##
## [[120]]
## [1] "rbcL-Ba001"
##
## [[121]]
## [1] "rbcL-Ba002"
##
## [[122]]
## [1] "rbcL-Ba003"
##
## [[123]]
## [1] "rbcL-Bb001"
##
## [[124]]
## [1] "rbcL-Bb002"
##
## [[125]]
## [1] "rbcL-Bb003"
##
## [[126]]
## [1] "rbcL-Bb004"
##
## [[127]]
## [1] "rbcL-Bb005"
##
## [[128]]
## [1] "rbcL-Bb007"
##
## [[129]]
## [1] "rbcL-Bb008"
##
## [[130]]
## [1] "rbcL-Bb009"
##
## [[131]]
## [1] "rbcL-Bb010"
##
## [[132]]
## [1] "rbcL-Bb011"
##
## [[133]]
## [1] "rbcL-Bb012"
##
## [[134]]
## [1] "rbcL-Bb013"
##
## [[135]]
## [1] "rbcL-Bb014"
##
## [[136]]
## [1] "rbcL-Bb015"
##
## [[137]]
## [1] "rbcL-Bb016"
##
## [[138]]
## [1] "rbcL-Bb017"
##
## [[139]]
## [1] "rbcL-Bb018"
##
## [[140]]
## [1] "rbcL-Bb019"
##
## [[141]]
## [1] "rbcL-Bb020"
##
## [[142]]
## [1] "rbcL-Bb021"
##
## [[143]]
## [1] "rbcL-Bb022"
##
## [[144]]
## [1] "rbcL-Bb023"
##
## [[145]]
## [1] "rbcL-Bb024"
##
## [[146]]
## [1] "rbcL-Bb025"
##
## [[147]]
## [1] "rbcL-Bf001"
##
## [[148]]
## [1] "rbcL-Bf002"
##
## [[149]]
## [1] "rbcL-Bf003"
##
## [[150]]
## [1] "rbcL-Bf004"
##
## [[151]]
## [1] "rbcL-Bg001"
##
## [[152]]
## [1] "rbcL-Bg002"
##
## [[153]]
## [1] "rbcL-Bg003"
##
## [[154]]
## [1] "rbcL-Bg004"
##
## [[155]]
## [1] "rbcL-Bg005"
##
## [[156]]
## [1] "rbcL-Bg006"
##
## [[157]]
## [1] "rbcL-Bg007"
##
## [[158]]
## [1] "rbcL-Bg008"
##
## [[159]]
## [1] "rbcL-Bg009"
##
## [[160]]
## [1] "rbcL-Bg010"
##
## [[161]]
## [1] "rbcL-Bg011"
##
## [[162]]
## [1] "rbcL-Bg012"
##
## [[163]]
## [1] "rbcL-Bg013"
##
## [[164]]
## [1] "rbcL-Bg014"
##
## [[165]]
## [1] "rbcL-Bg015"
##
## [[166]]
## [1] "rbcL-Bg016"
##
## [[167]]
## [1] "rbcL-Bg017"
##
## [[168]]
## [1] "rbcL-Bg018"
##
## [[169]]
## [1] "rbcL-Bg019"
##
## [[170]]
## [1] "rbcL-Bi001"
##
## [[171]]
## [1] "rbcL-Bi002"
##
## [[172]]
## [1] "rbcL-Bi003"
##
## [[173]]
## [1] "rbcL-Bi004"
##
## [[174]]
## [1] "rbcL-Bi005"
##
## [[175]]
## [1] "rbcL-Bi006"
##
## [[176]]
## [1] "rbcL-Bi007"
##
## [[177]]
## [1] "rbcL-CKC0001"
##
## [[178]]
## [1] "rbcL-ESE0004"
##
## [[179]]
## [1] "rbcL-ext-neg-ctrl-20230909"
##
## [[180]]
## [1] "rbcL-ext-neg-ctrl-20230923"
##
## [[181]]
## [1] "rbcL-ext-neg-ctrl-20230924"
##
## [[182]]
## [1] "rbcL-ext-neg-ctrl-20231007"
##
## [[183]]
## [1] "rbcL-ext-neg-ctrl-20231008"
##
## [[184]]
## [1] "rbcL-ext-neg-ctrl-20231009"
##
## [[185]]
## [1] "rbcL-ext-neg-ctrl-2024220A"
##
## [[186]]
## [1] "rbcL-ext-neg-ctrl-2024220B"
##
## [[187]]
## [1] "rbcL-ext-neg-ctrl-2024221A"
##
## [[188]]
## [1] "rbcL-ext-neg-ctrl-2024221B"
##
## [[189]]
## [1] "rbcL-ext-neg-ctrl-2024222A"
##
## [[190]]
## [1] "rbcL-ext-neg-ctrl-2024222B"
##
## [[191]]
## [1] "rbcL-ext-neg-ctrl-2024312A"
##
## [[192]]
## [1] "rbcL-ext-neg-ctrl-2024312B"
##
## [[193]]
## [1] "rbcL-ext-neg-ctrl-2024314A"
##
## [[194]]
## [1] "rbcL-ext-neg-ctrl-2024314B"
##
## [[195]]
## [1] "rbcL-ext-neg-ctrl-2024319"
##
## [[196]]
## [1] "rbcL-ext-neg-ctrl-2024320"
##
## [[197]]
## [1] "rbcL-KLS0007"
##
## [[198]]
## [1] "rbcL-KLS0027"
##
## [[199]]
## [1] "rbcL-KLS0044"
##
## [[200]]
## [1] "rbcL-KLS0045"
##
## [[201]]
## [1] "rbcL-KLS0052"
##
## [[202]]
## [1] "rbcL-KLS0054"
##
## [[203]]
## [1] "rbcL-KLS0055"
##
## [[204]]
## [1] "rbcL-KLS0071"
##
## [[205]]
## [1] "rbcL-KLS0095"
##
## [[206]]
## [1] "rbcL-KLS0096"
##
## [[207]]
## [1] "rbcL-KLS0105"
##
## [[208]]
## [1] "rbcL-KLS0106"
##
## [[209]]
## [1] "rbcL-KLS0119"
##
## [[210]]
## [1] "rbcL-KLS0134"
##
## [[211]]
## [1] "rbcL-KLS0135"
##
## [[212]]
## [1] "rbcL-KLS0136"
##
## [[213]]
## [1] "rbcL-KLS0137"
##
## [[214]]
## [1] "rbcL-KLS0138"
##
## [[215]]
## [1] "rbcL-KLS0139"
##
## [[216]]
## [1] "rbcL-KLS0150"
##
## [[217]]
## [1] "rbcL-KLS0153"
##
## [[218]]
## [1] "rbcL-KLS0155"
##
## [[219]]
## [1] "rbcL-KLS0156"
##
## [[220]]
## [1] "rbcL-KLS0159"
##
## [[221]]
## [1] "rbcL-KLS0163"
##
## [[222]]
## [1] "rbcL-KLS0165"
##
## [[223]]
## [1] "rbcL-KLS0167"
##
## [[224]]
## [1] "rbcL-KLS0168"
##
## [[225]]
## [1] "rbcL-KLS0169"
##
## [[226]]
## [1] "rbcL-KLS0170"
##
## [[227]]
## [1] "rbcL-KLS0200"
##
## [[228]]
## [1] "rbcL-KLS0201"
##
## [[229]]
## [1] "rbcL-KLS0205"
##
## [[230]]
## [1] "rbcL-KLS0209"
##
## [[231]]
## [1] "rbcL-KLS0221"
##
## [[232]]
## [1] "rbcL-KLS0224"
##
## [[233]]
## [1] "rbcL-KLS0225"
##
## [[234]]
## [1] "rbcL-KLS0227"
##
## [[235]]
## [1] "rbcL-KLS0241"
##
## [[236]]
## [1] "rbcL-KLS0244"
##
## [[237]]
## [1] "rbcL-KLS0246"
##
## [[238]]
## [1] "rbcL-KLS0248"
##
## [[239]]
## [1] "rbcL-KLS0253"
##
## [[240]]
## [1] "rbcL-KLS0254"
##
## [[241]]
## [1] "rbcL-KLS0256"
##
## [[242]]
## [1] "rbcL-KLS0259"
##
## [[243]]
## [1] "rbcL-KLS0263"
##
## [[244]]
## [1] "rbcL-KLS0266"
##
## [[245]]
## [1] "rbcL-KLS0272"
##
## [[246]]
## [1] "rbcL-pcr-rbcL-neg-crtl-20240417"
##
## [[247]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240409"
##
## [[248]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418A"
##
## [[249]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418B"
##
## [[250]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240523"
##
## [[251]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240531"
##
## [[252]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-Greeshma-20240416"
##
## [[253]]
## [1] "rbcL-rbcL-pcr-neg-ctrl-20231021-20231119"
##
## [[254]]
## [1] "rbcL-rbcL-pcr-neg-ctrl-20231022-20231120"
##
## [[255]]
## [1] "rbcL-rbcL-pcr-neg-ctrl-20231023-20231121"
##
## [[256]]
## [1] "rbcL-SCA0009"
##
## [[257]]
## [1] "rbcL-SCA0010"
##
## [[258]]
## [1] "rbcL-SCA0013"
strsplit(sapply(strsplit(rownames(as.data.frame(out)), "_S"), function(l) l[[1]]),"-")
## [[1]]
## [1] "rbcL" "2020" "6" "16" "H1"
##
## [[2]]
## [1] "rbcL" "2020" "6" "16" "H5"
##
## [[3]]
## [1] "rbcL" "2020" "6" "16" "H6"
##
## [[4]]
## [1] "rbcL" "2020" "6" "17" "H2"
##
## [[5]]
## [1] "rbcL" "2020" "6" "17" "H4"
##
## [[6]]
## [1] "rbcL" "2020" "6" "17" "H8"
##
## [[7]]
## [1] "rbcL" "2020" "6" "18" "H3"
##
## [[8]]
## [1] "rbcL" "2020" "6" "18" "H7"
##
## [[9]]
## [1] "rbcL" "2020" "6" "18" "H9"
##
## [[10]]
## [1] "rbcL" "2020" "6" "3" "H1"
##
## [[11]]
## [1] "rbcL" "2020" "6" "3" "H5"
##
## [[12]]
## [1] "rbcL" "2020" "6" "3" "H6"
##
## [[13]]
## [1] "rbcL" "2020" "6" "30" "H1"
##
## [[14]]
## [1] "rbcL" "2020" "6" "30" "H5"
##
## [[15]]
## [1] "rbcL" "2020" "6" "30" "H6"
##
## [[16]]
## [1] "rbcL" "2020" "6" "4" "H2"
##
## [[17]]
## [1] "rbcL" "2020" "6" "4" "H4"
##
## [[18]]
## [1] "rbcL" "2020" "6" "4" "H8"
##
## [[19]]
## [1] "rbcL" "2020" "6" "5" "H3"
##
## [[20]]
## [1] "rbcL" "2020" "6" "5" "H7"
##
## [[21]]
## [1] "rbcL" "2020" "6" "5" "H9"
##
## [[22]]
## [1] "rbcL" "2020" "7" "1" "H2"
##
## [[23]]
## [1] "rbcL" "2020" "7" "1" "H4"
##
## [[24]]
## [1] "rbcL" "2020" "7" "1" "H8"
##
## [[25]]
## [1] "rbcL" "2020" "7" "14" "H1"
##
## [[26]]
## [1] "rbcL" "2020" "7" "14" "H5"
##
## [[27]]
## [1] "rbcL" "2020" "7" "14" "H6"
##
## [[28]]
## [1] "rbcL" "2020" "7" "15" "H4"
##
## [[29]]
## [1] "rbcL" "2020" "7" "15" "H8"
##
## [[30]]
## [1] "rbcL" "2020" "7" "16" "H3"
##
## [[31]]
## [1] "rbcL" "2020" "7" "16" "H7"
##
## [[32]]
## [1] "rbcL" "2020" "7" "16" "H9"
##
## [[33]]
## [1] "rbcL" "2020" "7" "2" "H3"
##
## [[34]]
## [1] "rbcL" "2020" "7" "2" "H7"
##
## [[35]]
## [1] "rbcL" "2020" "7" "2" "H9"
##
## [[36]]
## [1] "rbcL" "2021" "6" "13" "H1"
##
## [[37]]
## [1] "rbcL" "2021" "6" "13" "H3"
##
## [[38]]
## [1] "rbcL" "2021" "6" "14" "H11"
##
## [[39]]
## [1] "rbcL" "2021" "6" "14" "H6"
##
## [[40]]
## [1] "rbcL" "2021" "6" "14" "H7"
##
## [[41]]
## [1] "rbcL" "2021" "6" "15" "H8"
##
## [[42]]
## [1] "rbcL" "2021" "6" "21" "H10"
##
## [[43]]
## [1] "rbcL" "2021" "6" "21" "H12"
##
## [[44]]
## [1] "rbcL" "2021" "6" "21" "H9"
##
## [[45]]
## [1] "rbcL" "2021" "6" "27" "H21"
##
## [[46]]
## [1] "rbcL" "2021" "6" "27" "H22"
##
## [[47]]
## [1] "rbcL" "2021" "6" "27" "H27"
##
## [[48]]
## [1] "rbcL" "2021" "6" "28" "H25"
##
## [[49]]
## [1] "rbcL" "2021" "6" "28" "H26"
##
## [[50]]
## [1] "rbcL" "2021" "6" "28" "H28"
##
## [[51]]
## [1] "rbcL" "2021" "6" "29" "H17"
##
## [[52]]
## [1] "rbcL" "2021" "6" "29" "H23"
##
## [[53]]
## [1] "rbcL" "2021" "6" "29" "H24"
##
## [[54]]
## [1] "rbcL" "2021" "6" "4" "H21"
##
## [[55]]
## [1] "rbcL" "2021" "6" "4" "H22"
##
## [[56]]
## [1] "rbcL" "2021" "6" "4" "H27"
##
## [[57]]
## [1] "rbcL" "2021" "6" "5" "H18"
##
## [[58]]
## [1] "rbcL" "2021" "6" "5" "H25"
##
## [[59]]
## [1] "rbcL" "2021" "6" "5" "H26"
##
## [[60]]
## [1] "rbcL" "2021" "6" "6" "H17"
##
## [[61]]
## [1] "rbcL" "2021" "6" "6" "H24"
##
## [[62]]
## [1] "rbcL" "2021" "6" "7" "H23"
##
## [[63]]
## [1] "rbcL" "2021" "7" "14" "H10"
##
## [[64]]
## [1] "rbcL" "2021" "7" "14" "H12"
##
## [[65]]
## [1] "rbcL" "2021" "7" "20" "H27"
##
## [[66]]
## [1] "rbcL" "2021" "7" "21" "H25"
##
## [[67]]
## [1] "rbcL" "2021" "7" "21" "H26"
##
## [[68]]
## [1] "rbcL" "2021" "7" "6" "H11"
##
## [[69]]
## [1] "rbcL" "2021" "7" "6" "H6"
##
## [[70]]
## [1] "rbcL" "2021" "7" "7" "H4"
##
## [[71]]
## [1] "rbcL" "2021" "7" "7" "H8"
##
## [[72]]
## [1] "rbcL" "2021" "7" "8" "H3"
##
## [[73]]
## [1] "rbcL" "2023" "6" "12" "H3"
##
## [[74]]
## [1] "rbcL" "2023" "6" "12" "H5"
##
## [[75]]
## [1] "rbcL" "2023" "6" "12" "H7"
##
## [[76]]
## [1] "rbcL" "2023" "6" "13" "H6"
##
## [[77]]
## [1] "rbcL" "2023" "6" "13" "H8"
##
## [[78]]
## [1] "rbcL" "2023" "6" "13" "H9"
##
## [[79]]
## [1] "rbcL" "2023" "6" "14" "H3"
##
## [[80]]
## [1] "rbcL" "2023" "6" "14" "H7"
##
## [[81]]
## [1] "rbcL" "2023" "6" "14" "H9"
##
## [[82]]
## [1] "rbcL" "2023" "6" "16" "H5"
##
## [[83]]
## [1] "rbcL" "2023" "6" "24" "H6"
##
## [[84]]
## [1] "rbcL" "2023" "6" "24" "H8"
##
## [[85]]
## [1] "rbcL" "2023" "6" "25" "H2"
##
## [[86]]
## [1] "rbcL" "2023" "6" "25" "H4"
##
## [[87]]
## [1] "rbcL" "2023" "6" "26" "H1"
##
## [[88]]
## [1] "rbcL" "2023" "6" "26" "H7"
##
## [[89]]
## [1] "rbcL" "2023" "6" "27" "H3"
##
## [[90]]
## [1] "rbcL" "2023" "6" "27" "H5"
##
## [[91]]
## [1] "rbcL" "2023" "6" "8" "H1"
##
## [[92]]
## [1] "rbcL" "2023" "6" "8" "H2"
##
## [[93]]
## [1] "rbcL" "2023" "6" "8" "H4"
##
## [[94]]
## [1] "rbcL" "2023" "6" "9" "H2"
##
## [[95]]
## [1] "rbcL" "2023" "6" "9" "H4"
##
## [[96]]
## [1] "rbcL" "2023" "7" "15" "H6"
##
## [[97]]
## [1] "rbcL" "2023" "7" "16" "H4"
##
## [[98]]
## [1] "rbcL" "2023" "7" "17" "H1"
##
## [[99]]
## [1] "rbcL" "2023" "7" "18" "H3"
##
## [[100]]
## [1] "rbcL" "2023" "7" "18" "H7"
##
## [[101]]
## [1] "rbcL" "2023" "7" "29" "H5"
##
## [[102]]
## [1] "rbcL" "2023" "7" "29" "H7"
##
## [[103]]
## [1] "rbcL" "2023" "7" "30" "H8"
##
## [[104]]
## [1] "rbcL" "2023" "7" "30" "H9"
##
## [[105]]
## [1] "rbcL" "2023" "7" "5" "H1"
##
## [[106]]
## [1] "rbcL" "2023" "7" "5" "H2"
##
## [[107]]
## [1] "rbcL" "2023" "7" "5" "H4"
##
## [[108]]
## [1] "rbcL" "2023" "7" "6" "H6"
##
## [[109]]
## [1] "rbcL" "2023" "7" "6" "H8"
##
## [[110]]
## [1] "rbcL" "2023" "7" "6" "H9"
##
## [[111]]
## [1] "rbcL" "2023" "7" "8" "H3"
##
## [[112]]
## [1] "rbcL" "2023" "7" "8" "H5"
##
## [[113]]
## [1] "rbcL" "2023" "7" "8" "H7"
##
## [[114]]
## [1] "rbcL" "2023" "8" "4" "H2"
##
## [[115]]
## [1] "rbcL" "2023" "8" "4" "H5"
##
## [[116]]
## [1] "rbcL" "2023" "8" "4" "H6"
##
## [[117]]
## [1] "rbcL" "2023" "8" "4" "H7"
##
## [[118]]
## [1] "rbcL" "2023" "8" "4" "H8"
##
## [[119]]
## [1] "rbcL" "2023" "8" "4" "H9"
##
## [[120]]
## [1] "rbcL" "Ba001"
##
## [[121]]
## [1] "rbcL" "Ba002"
##
## [[122]]
## [1] "rbcL" "Ba003"
##
## [[123]]
## [1] "rbcL" "Bb001"
##
## [[124]]
## [1] "rbcL" "Bb002"
##
## [[125]]
## [1] "rbcL" "Bb003"
##
## [[126]]
## [1] "rbcL" "Bb004"
##
## [[127]]
## [1] "rbcL" "Bb005"
##
## [[128]]
## [1] "rbcL" "Bb007"
##
## [[129]]
## [1] "rbcL" "Bb008"
##
## [[130]]
## [1] "rbcL" "Bb009"
##
## [[131]]
## [1] "rbcL" "Bb010"
##
## [[132]]
## [1] "rbcL" "Bb011"
##
## [[133]]
## [1] "rbcL" "Bb012"
##
## [[134]]
## [1] "rbcL" "Bb013"
##
## [[135]]
## [1] "rbcL" "Bb014"
##
## [[136]]
## [1] "rbcL" "Bb015"
##
## [[137]]
## [1] "rbcL" "Bb016"
##
## [[138]]
## [1] "rbcL" "Bb017"
##
## [[139]]
## [1] "rbcL" "Bb018"
##
## [[140]]
## [1] "rbcL" "Bb019"
##
## [[141]]
## [1] "rbcL" "Bb020"
##
## [[142]]
## [1] "rbcL" "Bb021"
##
## [[143]]
## [1] "rbcL" "Bb022"
##
## [[144]]
## [1] "rbcL" "Bb023"
##
## [[145]]
## [1] "rbcL" "Bb024"
##
## [[146]]
## [1] "rbcL" "Bb025"
##
## [[147]]
## [1] "rbcL" "Bf001"
##
## [[148]]
## [1] "rbcL" "Bf002"
##
## [[149]]
## [1] "rbcL" "Bf003"
##
## [[150]]
## [1] "rbcL" "Bf004"
##
## [[151]]
## [1] "rbcL" "Bg001"
##
## [[152]]
## [1] "rbcL" "Bg002"
##
## [[153]]
## [1] "rbcL" "Bg003"
##
## [[154]]
## [1] "rbcL" "Bg004"
##
## [[155]]
## [1] "rbcL" "Bg005"
##
## [[156]]
## [1] "rbcL" "Bg006"
##
## [[157]]
## [1] "rbcL" "Bg007"
##
## [[158]]
## [1] "rbcL" "Bg008"
##
## [[159]]
## [1] "rbcL" "Bg009"
##
## [[160]]
## [1] "rbcL" "Bg010"
##
## [[161]]
## [1] "rbcL" "Bg011"
##
## [[162]]
## [1] "rbcL" "Bg012"
##
## [[163]]
## [1] "rbcL" "Bg013"
##
## [[164]]
## [1] "rbcL" "Bg014"
##
## [[165]]
## [1] "rbcL" "Bg015"
##
## [[166]]
## [1] "rbcL" "Bg016"
##
## [[167]]
## [1] "rbcL" "Bg017"
##
## [[168]]
## [1] "rbcL" "Bg018"
##
## [[169]]
## [1] "rbcL" "Bg019"
##
## [[170]]
## [1] "rbcL" "Bi001"
##
## [[171]]
## [1] "rbcL" "Bi002"
##
## [[172]]
## [1] "rbcL" "Bi003"
##
## [[173]]
## [1] "rbcL" "Bi004"
##
## [[174]]
## [1] "rbcL" "Bi005"
##
## [[175]]
## [1] "rbcL" "Bi006"
##
## [[176]]
## [1] "rbcL" "Bi007"
##
## [[177]]
## [1] "rbcL" "CKC0001"
##
## [[178]]
## [1] "rbcL" "ESE0004"
##
## [[179]]
## [1] "rbcL" "ext" "neg" "ctrl" "20230909"
##
## [[180]]
## [1] "rbcL" "ext" "neg" "ctrl" "20230923"
##
## [[181]]
## [1] "rbcL" "ext" "neg" "ctrl" "20230924"
##
## [[182]]
## [1] "rbcL" "ext" "neg" "ctrl" "20231007"
##
## [[183]]
## [1] "rbcL" "ext" "neg" "ctrl" "20231008"
##
## [[184]]
## [1] "rbcL" "ext" "neg" "ctrl" "20231009"
##
## [[185]]
## [1] "rbcL" "ext" "neg" "ctrl" "2024220A"
##
## [[186]]
## [1] "rbcL" "ext" "neg" "ctrl" "2024220B"
##
## [[187]]
## [1] "rbcL" "ext" "neg" "ctrl" "2024221A"
##
## [[188]]
## [1] "rbcL" "ext" "neg" "ctrl" "2024221B"
##
## [[189]]
## [1] "rbcL" "ext" "neg" "ctrl" "2024222A"
##
## [[190]]
## [1] "rbcL" "ext" "neg" "ctrl" "2024222B"
##
## [[191]]
## [1] "rbcL" "ext" "neg" "ctrl" "2024312A"
##
## [[192]]
## [1] "rbcL" "ext" "neg" "ctrl" "2024312B"
##
## [[193]]
## [1] "rbcL" "ext" "neg" "ctrl" "2024314A"
##
## [[194]]
## [1] "rbcL" "ext" "neg" "ctrl" "2024314B"
##
## [[195]]
## [1] "rbcL" "ext" "neg" "ctrl" "2024319"
##
## [[196]]
## [1] "rbcL" "ext" "neg" "ctrl" "2024320"
##
## [[197]]
## [1] "rbcL" "KLS0007"
##
## [[198]]
## [1] "rbcL" "KLS0027"
##
## [[199]]
## [1] "rbcL" "KLS0044"
##
## [[200]]
## [1] "rbcL" "KLS0045"
##
## [[201]]
## [1] "rbcL" "KLS0052"
##
## [[202]]
## [1] "rbcL" "KLS0054"
##
## [[203]]
## [1] "rbcL" "KLS0055"
##
## [[204]]
## [1] "rbcL" "KLS0071"
##
## [[205]]
## [1] "rbcL" "KLS0095"
##
## [[206]]
## [1] "rbcL" "KLS0096"
##
## [[207]]
## [1] "rbcL" "KLS0105"
##
## [[208]]
## [1] "rbcL" "KLS0106"
##
## [[209]]
## [1] "rbcL" "KLS0119"
##
## [[210]]
## [1] "rbcL" "KLS0134"
##
## [[211]]
## [1] "rbcL" "KLS0135"
##
## [[212]]
## [1] "rbcL" "KLS0136"
##
## [[213]]
## [1] "rbcL" "KLS0137"
##
## [[214]]
## [1] "rbcL" "KLS0138"
##
## [[215]]
## [1] "rbcL" "KLS0139"
##
## [[216]]
## [1] "rbcL" "KLS0150"
##
## [[217]]
## [1] "rbcL" "KLS0153"
##
## [[218]]
## [1] "rbcL" "KLS0155"
##
## [[219]]
## [1] "rbcL" "KLS0156"
##
## [[220]]
## [1] "rbcL" "KLS0159"
##
## [[221]]
## [1] "rbcL" "KLS0163"
##
## [[222]]
## [1] "rbcL" "KLS0165"
##
## [[223]]
## [1] "rbcL" "KLS0167"
##
## [[224]]
## [1] "rbcL" "KLS0168"
##
## [[225]]
## [1] "rbcL" "KLS0169"
##
## [[226]]
## [1] "rbcL" "KLS0170"
##
## [[227]]
## [1] "rbcL" "KLS0200"
##
## [[228]]
## [1] "rbcL" "KLS0201"
##
## [[229]]
## [1] "rbcL" "KLS0205"
##
## [[230]]
## [1] "rbcL" "KLS0209"
##
## [[231]]
## [1] "rbcL" "KLS0221"
##
## [[232]]
## [1] "rbcL" "KLS0224"
##
## [[233]]
## [1] "rbcL" "KLS0225"
##
## [[234]]
## [1] "rbcL" "KLS0227"
##
## [[235]]
## [1] "rbcL" "KLS0241"
##
## [[236]]
## [1] "rbcL" "KLS0244"
##
## [[237]]
## [1] "rbcL" "KLS0246"
##
## [[238]]
## [1] "rbcL" "KLS0248"
##
## [[239]]
## [1] "rbcL" "KLS0253"
##
## [[240]]
## [1] "rbcL" "KLS0254"
##
## [[241]]
## [1] "rbcL" "KLS0256"
##
## [[242]]
## [1] "rbcL" "KLS0259"
##
## [[243]]
## [1] "rbcL" "KLS0263"
##
## [[244]]
## [1] "rbcL" "KLS0266"
##
## [[245]]
## [1] "rbcL" "KLS0272"
##
## [[246]]
## [1] "rbcL" "pcr" "rbcL" "neg" "crtl" "20240417"
##
## [[247]]
## [1] "rbcL" "pcr" "rbcL" "neg" "ctrl" "20240409"
##
## [[248]]
## [1] "rbcL" "pcr" "rbcL" "neg" "ctrl" "20240418A"
##
## [[249]]
## [1] "rbcL" "pcr" "rbcL" "neg" "ctrl" "20240418B"
##
## [[250]]
## [1] "rbcL" "pcr" "rbcL" "neg" "ctrl" "20240523"
##
## [[251]]
## [1] "rbcL" "pcr" "rbcL" "neg" "ctrl" "20240531"
##
## [[252]]
## [1] "rbcL" "pcr" "rbcL" "neg" "ctrl" "Greeshma" "20240416"
##
## [[253]]
## [1] "rbcL" "rbcL" "pcr" "neg" "ctrl" "20231021" "20231119"
##
## [[254]]
## [1] "rbcL" "rbcL" "pcr" "neg" "ctrl" "20231022" "20231120"
##
## [[255]]
## [1] "rbcL" "rbcL" "pcr" "neg" "ctrl" "20231023" "20231121"
##
## [[256]]
## [1] "rbcL" "SCA0009"
##
## [[257]]
## [1] "rbcL" "SCA0010"
##
## [[258]]
## [1] "rbcL" "SCA0013"
strsplit(sapply(strsplit(rownames(as.data.frame(out)), "_S"), function(l) l[[1]]),"-")[[1]][-1]
## [1] "2020" "6" "16" "H1"
temp<-strsplit(sapply(strsplit(rownames(as.data.frame(out)), "_S"), function(l) l[[1]]),"-")
sample.names<-character(length(rownames(as.data.frame(out)))) #set up container object
for(i in 1:length(rownames(as.data.frame(out)))){ #fill container with sample names
sample.names[i]<-paste(temp[[i]][-1],collapse="_")
}
head(sample.names); tail(sample.names); length(sample.names); length(rownames(out)) #sample.names, length of sample.names, length of samples output from filterAndTrim
## [1] "2020_6_16_H1" "2020_6_16_H5" "2020_6_16_H6" "2020_6_17_H2" "2020_6_17_H4"
## [6] "2020_6_17_H8"
## [1] "rbcL_pcr_neg_ctrl_20231021_20231119" "rbcL_pcr_neg_ctrl_20231022_20231120"
## [3] "rbcL_pcr_neg_ctrl_20231023_20231121" "SCA0009"
## [5] "SCA0010" "SCA0013"
## [1] 258
## [1] 258
rownames(out)<-sample.names
Not every sample made it through the filterAndTrim step
length(file.path(path.cut, "filtered", basename(cutFs))) #length of "filtFs," created in chunk above (258)
## [1] 258
length(list.files(file.path(path.cut, "filtered"), pattern = "L001_R1_001.fastq", full.names = TRUE)) #length of files actually written to the filtFs directories (246)
## [1] 246
# update directory, since not all samples made it thru the filter
filtFs <- file.path(path.cut, "filtered", basename(list.files(file.path(path.cut, "filtered"), pattern = "L001_R1_001.fastq", full.names = TRUE)))
filtRs <- file.path(path.cut, "filtered", basename(list.files(file.path(path.cut, "filtered"), pattern = "L001_R2_001.fastq", full.names = TRUE)))
Learns the error rates from an input list, or vector, of file names or a list of derep-class objects. Error rate estimation is performed by errorEstimationFunction. The output of this function serves as input to the dada function call as the err parameter
This uses the reads from the filter and trimmed files located in the “filtered” folder /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered
#You can safely ignore error messages “Not all sequences were the same length.”
errF <- learnErrors(filtFs, multithread = TRUE)
## 100662550 total bases in 366324 reads from 38 samples will be used for learning the error rates.
errR <- learnErrors(filtRs, multithread = TRUE)
## 100000500 total bases in 355673 reads from 37 samples will be used for learning the error rates.
#explanation of parameters in the learnErrors() function:
#learnErrors(
#fls, <-- fastq files
#nbases = 1e+08, <-- minimum number of total bases to learn error rate
#nreads = NULL, <-- deprecated, don't use
#errorEstimationFunction = loessErrfun,
#multithread = FALSE, <-- if enabled, sets the number of threads
#randomize = FALSE, <-- If FALSE, samples are read in the provided order until enough reads are obtained. If TRUE, samples are picked at random from those provided
#MAX_CONSIST = 10, <--The maximum number of times to step through the self-consistency loop.
#OMEGA_C = 0, <--The threshold at which unique sequences inferred to contain errors are corrected in the final output, and used to estimate the error rates
#qualityType = "Auto", <--The quality encoding of the fastq file(s). "Auto" (the default) means to attempt to auto-detect the encoding.
#verbose = FALSE)
We expect a roughly linear decrease in Log transformed error frequency as the consensus quality score increases from 0 to 40
plotErrors(errF, nominalQ = TRUE) #forward
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
plotErrors(errR, nominalQ = TRUE) #reverse
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
The quality profile plot is a gray-scale heatmap of the frequency of each quality score at each base position. The median quality score at each position is shown by the green line, and the quartiles of the quality score distribution by the orange lines. The read line shows the scaled proportion of reads that extend to at least that position.
plotQualityProfile(filtFs[1]) #inspect first sample's forward reads
plotQualityProfile(filtRs[1]) #reverse always worse
Dereplication combines all identical sequencing reads into into “unique sequences” with a corresponding “abundance” (the number of reads with that same sequence). Dereplication substantially reduces computation time by eliminating redundant comparisons.
DADA2 retains a summary of the quality information associated with each unique sequence. The consensus quality profile of a unique sequence is the average of the positional qualities from the dereplicated reads. These quality profiles inform the error model of the subsequent denoising step, significantly increasing DADA2’s accuracy. But we did the learnErrors step before dereplication? dada is the denoising step and uses the error model created before
using the reads from the filter and trimmed files located in the “filtered” folder /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered
derepFs <- derepFastq(filtFs, verbose = TRUE)
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-16-H1_S293_L001_R1_001.fastq
## Encountered 1458 unique sequences from 9241 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-16-H5_S294_L001_R1_001.fastq
## Encountered 1990 unique sequences from 5929 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-16-H6_S295_L001_R1_001.fastq
## Encountered 242 unique sequences from 850 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-17-H2_S296_L001_R1_001.fastq
## Encountered 1916 unique sequences from 4559 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-17-H4_S297_L001_R1_001.fastq
## Encountered 580 unique sequences from 2027 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-17-H8_S298_L001_R1_001.fastq
## Encountered 2426 unique sequences from 15437 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-18-H3_S299_L001_R1_001.fastq
## Encountered 1391 unique sequences from 4021 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-18-H7_S300_L001_R1_001.fastq
## Encountered 1622 unique sequences from 8636 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-18-H9_S301_L001_R1_001.fastq
## Encountered 959 unique sequences from 4778 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-3-H1_S302_L001_R1_001.fastq
## Encountered 3209 unique sequences from 15258 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-3-H5_S303_L001_R1_001.fastq
## Encountered 4372 unique sequences from 17366 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-3-H6_S304_L001_R1_001.fastq
## Encountered 3275 unique sequences from 17405 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-30-H1_S305_L001_R1_001.fastq
## Encountered 2994 unique sequences from 12116 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-30-H5_S306_L001_R1_001.fastq
## Encountered 2674 unique sequences from 12313 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-30-H6_S307_L001_R1_001.fastq
## Encountered 2500 unique sequences from 12755 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-4-H2_S308_L001_R1_001.fastq
## Encountered 355 unique sequences from 851 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-4-H4_S309_L001_R1_001.fastq
## Encountered 166 unique sequences from 350 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-4-H8_S310_L001_R1_001.fastq
## Encountered 2544 unique sequences from 8162 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-5-H3_S311_L001_R1_001.fastq
## Encountered 3350 unique sequences from 16183 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-5-H7_S312_L001_R1_001.fastq
## Encountered 2798 unique sequences from 13260 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-5-H9_S313_L001_R1_001.fastq
## Encountered 2281 unique sequences from 12153 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-1-H2_S314_L001_R1_001.fastq
## Encountered 3378 unique sequences from 13295 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-1-H4_S315_L001_R1_001.fastq
## Encountered 2 unique sequences from 2 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-1-H8_S316_L001_R1_001.fastq
## Encountered 2738 unique sequences from 13616 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-14-H5_S318_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-14-H6_S319_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-15-H8_S322_L001_R1_001.fastq
## Encountered 1485 unique sequences from 8785 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-16-H3_S323_L001_R1_001.fastq
## Encountered 5234 unique sequences from 36464 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-16-H7_S324_L001_R1_001.fastq
## Encountered 1441 unique sequences from 8836 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-16-H9_S325_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-2-H3_S326_L001_R1_001.fastq
## Encountered 1382 unique sequences from 7246 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-2-H7_S327_L001_R1_001.fastq
## Encountered 2149 unique sequences from 9836 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-2-H9_S328_L001_R1_001.fastq
## Encountered 2901 unique sequences from 14023 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-13-H1_S329_L001_R1_001.fastq
## Encountered 2254 unique sequences from 14260 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-13-H3_S330_L001_R1_001.fastq
## Encountered 2637 unique sequences from 11753 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-14-H11_S331_L001_R1_001.fastq
## Encountered 1857 unique sequences from 12176 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-14-H6_S332_L001_R1_001.fastq
## Encountered 2604 unique sequences from 11728 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-14-H7_S333_L001_R1_001.fastq
## Encountered 1602 unique sequences from 10651 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-15-H8_S334_L001_R1_001.fastq
## Encountered 2309 unique sequences from 12915 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-21-H10_S335_L001_R1_001.fastq
## Encountered 2961 unique sequences from 16187 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-21-H12_S336_L001_R1_001.fastq
## Encountered 2412 unique sequences from 17032 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-21-H9_S337_L001_R1_001.fastq
## Encountered 2377 unique sequences from 13821 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-27-H21_S338_L001_R1_001.fastq
## Encountered 2963 unique sequences from 15729 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-27-H22_S339_L001_R1_001.fastq
## Encountered 1811 unique sequences from 19232 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-27-H27_S340_L001_R1_001.fastq
## Encountered 3128 unique sequences from 12550 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-28-H25_S341_L001_R1_001.fastq
## Encountered 1877 unique sequences from 10169 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-28-H26_S342_L001_R1_001.fastq
## Encountered 1527 unique sequences from 6196 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-28-H28_S343_L001_R1_001.fastq
## Encountered 2508 unique sequences from 10332 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-29-H17_S344_L001_R1_001.fastq
## Encountered 1528 unique sequences from 9104 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-29-H23_S345_L001_R1_001.fastq
## Encountered 2284 unique sequences from 11907 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-29-H24_S346_L001_R1_001.fastq
## Encountered 191 unique sequences from 715 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-4-H21_S347_L001_R1_001.fastq
## Encountered 2264 unique sequences from 11218 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-4-H22_S348_L001_R1_001.fastq
## Encountered 1183 unique sequences from 4717 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-4-H27_S349_L001_R1_001.fastq
## Encountered 1069 unique sequences from 4547 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-5-H18_S350_L001_R1_001.fastq
## Encountered 440 unique sequences from 1152 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-5-H25_S351_L001_R1_001.fastq
## Encountered 1090 unique sequences from 2834 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-5-H26_S352_L001_R1_001.fastq
## Encountered 1094 unique sequences from 3400 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-6-H17_S353_L001_R1_001.fastq
## Encountered 1689 unique sequences from 7362 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-6-H24_S354_L001_R1_001.fastq
## Encountered 2915 unique sequences from 11939 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-7-H23_S355_L001_R1_001.fastq
## Encountered 2123 unique sequences from 10457 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-14-H10_S356_L001_R1_001.fastq
## Encountered 2658 unique sequences from 22653 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-14-H12_S357_L001_R1_001.fastq
## Encountered 4 unique sequences from 4 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-20-H27_S358_L001_R1_001.fastq
## Encountered 1642 unique sequences from 9282 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-21-H25_S359_L001_R1_001.fastq
## Encountered 1689 unique sequences from 12630 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-21-H26_S360_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-6-H11_S362_L001_R1_001.fastq
## Encountered 2779 unique sequences from 13905 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-6-H6_S364_L001_R1_001.fastq
## Encountered 3232 unique sequences from 13693 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-7-H8_S366_L001_R1_001.fastq
## Encountered 1829 unique sequences from 15342 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-8-H3_S367_L001_R1_001.fastq
## Encountered 1860 unique sequences from 11288 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-12-H3_S368_L001_R1_001.fastq
## Encountered 1177 unique sequences from 6412 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-12-H5_S369_L001_R1_001.fastq
## Encountered 2436 unique sequences from 14325 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-12-H7_S370_L001_R1_001.fastq
## Encountered 1678 unique sequences from 8964 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-13-H6_S371_L001_R1_001.fastq
## Encountered 2391 unique sequences from 12999 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-13-H8_S372_L001_R1_001.fastq
## Encountered 1918 unique sequences from 8254 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-13-H9_S373_L001_R1_001.fastq
## Encountered 921 unique sequences from 3377 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-14-H3_S374_L001_R1_001.fastq
## Encountered 2373 unique sequences from 15274 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-14-H7_S375_L001_R1_001.fastq
## Encountered 2981 unique sequences from 14310 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-14-H9_S376_L001_R1_001.fastq
## Encountered 2371 unique sequences from 12646 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-16-H5_S377_L001_R1_001.fastq
## Encountered 2142 unique sequences from 11967 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-24-H6_S378_L001_R1_001.fastq
## Encountered 2587 unique sequences from 8763 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-24-H8_S379_L001_R1_001.fastq
## Encountered 2773 unique sequences from 9782 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-25-H2_S380_L001_R1_001.fastq
## Encountered 3801 unique sequences from 13012 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-25-H4_S381_L001_R1_001.fastq
## Encountered 2176 unique sequences from 5350 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-26-H1_S382_L001_R1_001.fastq
## Encountered 2165 unique sequences from 5962 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-26-H7_S383_L001_R1_001.fastq
## Encountered 3472 unique sequences from 10330 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-27-H3_S384_L001_R1_001.fastq
## Encountered 1942 unique sequences from 6278 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-27-H5_S385_L001_R1_001.fastq
## Encountered 3675 unique sequences from 11398 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-8-H1_S386_L001_R1_001.fastq
## Encountered 1906 unique sequences from 9094 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-8-H2_S387_L001_R1_001.fastq
## Encountered 1706 unique sequences from 10848 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-8-H4_S388_L001_R1_001.fastq
## Encountered 2437 unique sequences from 11980 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-9-H2_S389_L001_R1_001.fastq
## Encountered 5129 unique sequences from 23277 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-9-H4_S390_L001_R1_001.fastq
## Encountered 2379 unique sequences from 7104 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-15-H6_S391_L001_R1_001.fastq
## Encountered 3223 unique sequences from 18724 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-16-H4_S392_L001_R1_001.fastq
## Encountered 2230 unique sequences from 13047 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-17-H1_S393_L001_R1_001.fastq
## Encountered 4109 unique sequences from 21557 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-18-H3_S394_L001_R1_001.fastq
## Encountered 2315 unique sequences from 7624 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-18-H7_S395_L001_R1_001.fastq
## Encountered 1583 unique sequences from 8641 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-29-H5_S396_L001_R1_001.fastq
## Encountered 1829 unique sequences from 8843 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-29-H7_S397_L001_R1_001.fastq
## Encountered 3136 unique sequences from 19278 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-30-H8_S398_L001_R1_001.fastq
## Encountered 1142 unique sequences from 8704 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-30-H9_S399_L001_R1_001.fastq
## Encountered 1078 unique sequences from 4403 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-5-H1_S400_L001_R1_001.fastq
## Encountered 1269 unique sequences from 6719 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-5-H2_S401_L001_R1_001.fastq
## Encountered 1042 unique sequences from 4171 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-6-H6_S403_L001_R1_001.fastq
## Encountered 2242 unique sequences from 11259 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-6-H8_S404_L001_R1_001.fastq
## Encountered 3890 unique sequences from 19324 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-6-H9_S405_L001_R1_001.fastq
## Encountered 4267 unique sequences from 22375 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-8-H3_S406_L001_R1_001.fastq
## Encountered 1435 unique sequences from 7103 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-8-H5_S407_L001_R1_001.fastq
## Encountered 1913 unique sequences from 12450 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-8-H7_S408_L001_R1_001.fastq
## Encountered 3427 unique sequences from 22286 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H2_S409_L001_R1_001.fastq
## Encountered 2576 unique sequences from 15351 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H5_S410_L001_R1_001.fastq
## Encountered 3942 unique sequences from 13489 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H6_S411_L001_R1_001.fastq
## Encountered 2186 unique sequences from 11447 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H7_S412_L001_R1_001.fastq
## Encountered 5257 unique sequences from 21306 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H8_S413_L001_R1_001.fastq
## Encountered 2240 unique sequences from 11628 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H9_S414_L001_R1_001.fastq
## Encountered 232 unique sequences from 344 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Ba001_S415_L001_R1_001.fastq
## Encountered 1936 unique sequences from 11589 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Ba002_S416_L001_R1_001.fastq
## Encountered 2555 unique sequences from 8068 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Ba003_S417_L001_R1_001.fastq
## Encountered 1150 unique sequences from 7634 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb001_S418_L001_R1_001.fastq
## Encountered 2073 unique sequences from 13734 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb002_S419_L001_R1_001.fastq
## Encountered 2733 unique sequences from 19049 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb003_S420_L001_R1_001.fastq
## Encountered 4817 unique sequences from 25904 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb004_S421_L001_R1_001.fastq
## Encountered 4831 unique sequences from 23070 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb005_S422_L001_R1_001.fastq
## Encountered 2293 unique sequences from 11519 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb007_S423_L001_R1_001.fastq
## Encountered 2107 unique sequences from 12125 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb008_S424_L001_R1_001.fastq
## Encountered 1172 unique sequences from 6567 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb009_S425_L001_R1_001.fastq
## Encountered 1672 unique sequences from 8599 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb010_S426_L001_R1_001.fastq
## Encountered 1976 unique sequences from 10777 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb011_S427_L001_R1_001.fastq
## Encountered 2442 unique sequences from 8051 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb012_S428_L001_R1_001.fastq
## Encountered 4932 unique sequences from 27231 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb013_S429_L001_R1_001.fastq
## Encountered 1573 unique sequences from 6172 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb014_S430_L001_R1_001.fastq
## Encountered 2922 unique sequences from 15434 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb015_S431_L001_R1_001.fastq
## Encountered 1641 unique sequences from 7061 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb016_S432_L001_R1_001.fastq
## Encountered 1258 unique sequences from 6078 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb017_S433_L001_R1_001.fastq
## Encountered 1724 unique sequences from 7797 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb018_S434_L001_R1_001.fastq
## Encountered 3752 unique sequences from 16225 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb019_S435_L001_R1_001.fastq
## Encountered 2852 unique sequences from 19040 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb020_S436_L001_R1_001.fastq
## Encountered 2300 unique sequences from 20153 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb021_S437_L001_R1_001.fastq
## Encountered 1942 unique sequences from 6625 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb022_S438_L001_R1_001.fastq
## Encountered 2128 unique sequences from 15270 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb023_S439_L001_R1_001.fastq
## Encountered 1651 unique sequences from 8291 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb024_S440_L001_R1_001.fastq
## Encountered 2620 unique sequences from 15599 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb025_S441_L001_R1_001.fastq
## Encountered 1501 unique sequences from 9402 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf001_S442_L001_R1_001.fastq
## Encountered 3433 unique sequences from 18214 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf002_S443_L001_R1_001.fastq
## Encountered 3 unique sequences from 4 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf003_S444_L001_R1_001.fastq
## Encountered 1932 unique sequences from 10825 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf004_S445_L001_R1_001.fastq
## Encountered 1389 unique sequences from 5494 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg001_S446_L001_R1_001.fastq
## Encountered 1199 unique sequences from 6774 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg002_S447_L001_R1_001.fastq
## Encountered 2431 unique sequences from 9593 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg003_S448_L001_R1_001.fastq
## Encountered 2390 unique sequences from 9878 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg004_S449_L001_R1_001.fastq
## Encountered 1476 unique sequences from 8208 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg005_S450_L001_R1_001.fastq
## Encountered 3025 unique sequences from 11754 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg006_S451_L001_R1_001.fastq
## Encountered 2148 unique sequences from 14258 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg007_S452_L001_R1_001.fastq
## Encountered 1728 unique sequences from 13163 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg008_S453_L001_R1_001.fastq
## Encountered 2815 unique sequences from 12164 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg009_S454_L001_R1_001.fastq
## Encountered 2005 unique sequences from 5238 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg010_S455_L001_R1_001.fastq
## Encountered 2092 unique sequences from 6285 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg011_S456_L001_R1_001.fastq
## Encountered 2397 unique sequences from 10184 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg012_S457_L001_R1_001.fastq
## Encountered 1236 unique sequences from 7157 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg013_S458_L001_R1_001.fastq
## Encountered 1739 unique sequences from 12665 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg014_S459_L001_R1_001.fastq
## Encountered 1893 unique sequences from 12895 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg015_S460_L001_R1_001.fastq
## Encountered 1793 unique sequences from 6987 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg016_S461_L001_R1_001.fastq
## Encountered 1522 unique sequences from 5489 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg017_S462_L001_R1_001.fastq
## Encountered 1124 unique sequences from 5605 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg018_S463_L001_R1_001.fastq
## Encountered 1468 unique sequences from 6740 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg019_S464_L001_R1_001.fastq
## Encountered 1534 unique sequences from 8707 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi001_S465_L001_R1_001.fastq
## Encountered 1691 unique sequences from 10410 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi002_S466_L001_R1_001.fastq
## Encountered 2813 unique sequences from 13359 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi003_S467_L001_R1_001.fastq
## Encountered 4425 unique sequences from 20553 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi004_S468_L001_R1_001.fastq
## Encountered 3192 unique sequences from 18623 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi005_S469_L001_R1_001.fastq
## Encountered 36 unique sequences from 149 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi006_S470_L001_R1_001.fastq
## Encountered 1268 unique sequences from 4554 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi007_S471_L001_R1_001.fastq
## Encountered 2803 unique sequences from 9976 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-CKC0001_S472_L001_R1_001.fastq
## Encountered 1046 unique sequences from 7833 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ESE0004_S473_L001_R1_001.fastq
## Encountered 1588 unique sequences from 9116 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-20230909_S474_L001_R1_001.fastq
## Encountered 2 unique sequences from 3 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-20231007_S477_L001_R1_001.fastq
## Encountered 3 unique sequences from 3 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-20231008_S478_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024220A_S480_L001_R1_001.fastq
## Encountered 26 unique sequences from 37 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024220B_S481_L001_R1_001.fastq
## Encountered 518 unique sequences from 2165 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024221A_S482_L001_R1_001.fastq
## Encountered 686 unique sequences from 2878 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024221B_S483_L001_R1_001.fastq
## Encountered 151 unique sequences from 622 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024222A_S484_L001_R1_001.fastq
## Encountered 198 unique sequences from 823 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024222B_S485_L001_R1_001.fastq
## Encountered 1232 unique sequences from 6313 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024312A_S486_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024312B_S487_L001_R1_001.fastq
## Encountered 6 unique sequences from 6 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024314A_S488_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024314B_S489_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024319_S490_L001_R1_001.fastq
## Encountered 43 unique sequences from 180 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0007_S492_L001_R1_001.fastq
## Encountered 2322 unique sequences from 8407 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0027_S494_L001_R1_001.fastq
## Encountered 1596 unique sequences from 7425 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0044_S495_L001_R1_001.fastq
## Encountered 286 unique sequences from 941 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0045_S496_L001_R1_001.fastq
## Encountered 2717 unique sequences from 16853 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0052_S497_L001_R1_001.fastq
## Encountered 254 unique sequences from 1057 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0054_S498_L001_R1_001.fastq
## Encountered 179 unique sequences from 991 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0055_S499_L001_R1_001.fastq
## Encountered 233 unique sequences from 1345 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0071_S500_L001_R1_001.fastq
## Encountered 1254 unique sequences from 10060 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0095_S501_L001_R1_001.fastq
## Encountered 2376 unique sequences from 18665 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0096_S502_L001_R1_001.fastq
## Encountered 1976 unique sequences from 11780 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0105_S503_L001_R1_001.fastq
## Encountered 2328 unique sequences from 20059 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0106_S504_L001_R1_001.fastq
## Encountered 2956 unique sequences from 13590 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0119_S505_L001_R1_001.fastq
## Encountered 1553 unique sequences from 10904 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0134_S506_L001_R1_001.fastq
## Encountered 1712 unique sequences from 10331 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0135_S507_L001_R1_001.fastq
## Encountered 452 unique sequences from 1762 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0136_S508_L001_R1_001.fastq
## Encountered 2012 unique sequences from 10072 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0137_S509_L001_R1_001.fastq
## Encountered 4607 unique sequences from 21450 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0138_S510_L001_R1_001.fastq
## Encountered 1797 unique sequences from 10763 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0139_S511_L001_R1_001.fastq
## Encountered 2508 unique sequences from 10490 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0150_S512_L001_R1_001.fastq
## Encountered 2725 unique sequences from 13943 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0153_S513_L001_R1_001.fastq
## Encountered 1076 unique sequences from 3156 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0155_S514_L001_R1_001.fastq
## Encountered 2738 unique sequences from 15382 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0156_S515_L001_R1_001.fastq
## Encountered 4653 unique sequences from 27268 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0159_S516_L001_R1_001.fastq
## Encountered 3951 unique sequences from 19126 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0163_S517_L001_R1_001.fastq
## Encountered 2418 unique sequences from 8259 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0165_S518_L001_R1_001.fastq
## Encountered 2021 unique sequences from 7382 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0167_S519_L001_R1_001.fastq
## Encountered 2582 unique sequences from 12356 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0168_S520_L001_R1_001.fastq
## Encountered 1500 unique sequences from 9986 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0169_S521_L001_R1_001.fastq
## Encountered 4242 unique sequences from 18064 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0170_S522_L001_R1_001.fastq
## Encountered 2102 unique sequences from 9654 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0200_S523_L001_R1_001.fastq
## Encountered 1968 unique sequences from 14566 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0201_S524_L001_R1_001.fastq
## Encountered 4120 unique sequences from 23716 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0205_S525_L001_R1_001.fastq
## Encountered 3134 unique sequences from 12367 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0209_S526_L001_R1_001.fastq
## Encountered 685 unique sequences from 2911 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0221_S527_L001_R1_001.fastq
## Encountered 2247 unique sequences from 8200 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0224_S528_L001_R1_001.fastq
## Encountered 1547 unique sequences from 11029 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0225_S529_L001_R1_001.fastq
## Encountered 3266 unique sequences from 15454 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0227_S530_L001_R1_001.fastq
## Encountered 1554 unique sequences from 9704 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0241_S531_L001_R1_001.fastq
## Encountered 2935 unique sequences from 13922 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0244_S532_L001_R1_001.fastq
## Encountered 3025 unique sequences from 12161 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0246_S533_L001_R1_001.fastq
## Encountered 1672 unique sequences from 5407 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0248_S534_L001_R1_001.fastq
## Encountered 4225 unique sequences from 23791 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0253_S535_L001_R1_001.fastq
## Encountered 2633 unique sequences from 14833 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0254_S536_L001_R1_001.fastq
## Encountered 2234 unique sequences from 9069 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0256_S493_L001_R1_001.fastq
## Encountered 5437 unique sequences from 26403 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0259_S537_L001_R1_001.fastq
## Encountered 2285 unique sequences from 14649 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0263_S538_L001_R1_001.fastq
## Encountered 413 unique sequences from 1481 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0266_S539_L001_R1_001.fastq
## Encountered 294 unique sequences from 896 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0272_S540_L001_R1_001.fastq
## Encountered 506 unique sequences from 977 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-crtl-20240417_S541_L001_R1_001.fastq
## Encountered 599 unique sequences from 2444 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240409_S542_L001_R1_001.fastq
## Encountered 111 unique sequences from 462 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240418A_S543_L001_R1_001.fastq
## Encountered 2234 unique sequences from 8900 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240418B_S544_L001_R1_001.fastq
## Encountered 21 unique sequences from 43 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240523_S545_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-rbcL-pcr-neg-ctrl-20231021-20231119_S548_L001_R1_001.fastq
## Encountered 3 unique sequences from 3 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-SCA0009_S551_L001_R1_001.fastq
## Encountered 3186 unique sequences from 18374 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-SCA0010_S552_L001_R1_001.fastq
## Encountered 2303 unique sequences from 13370 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-SCA0013_S553_L001_R1_001.fastq
## Encountered 3542 unique sequences from 18429 total sequences read.
derepRs <- derepFastq(filtRs, verbose = TRUE)
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-16-H1_S293_L001_R2_001.fastq
## Encountered 3205 unique sequences from 9241 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-16-H5_S294_L001_R2_001.fastq
## Encountered 3022 unique sequences from 5929 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-16-H6_S295_L001_R2_001.fastq
## Encountered 397 unique sequences from 850 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-17-H2_S296_L001_R2_001.fastq
## Encountered 2719 unique sequences from 4559 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-17-H4_S297_L001_R2_001.fastq
## Encountered 930 unique sequences from 2027 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-17-H8_S298_L001_R2_001.fastq
## Encountered 4895 unique sequences from 15437 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-18-H3_S299_L001_R2_001.fastq
## Encountered 2167 unique sequences from 4021 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-18-H7_S300_L001_R2_001.fastq
## Encountered 3009 unique sequences from 8636 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-18-H9_S301_L001_R2_001.fastq
## Encountered 1876 unique sequences from 4778 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-3-H1_S302_L001_R2_001.fastq
## Encountered 5820 unique sequences from 15258 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-3-H5_S303_L001_R2_001.fastq
## Encountered 7596 unique sequences from 17366 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-3-H6_S304_L001_R2_001.fastq
## Encountered 6427 unique sequences from 17405 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-30-H1_S305_L001_R2_001.fastq
## Encountered 4939 unique sequences from 12116 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-30-H5_S306_L001_R2_001.fastq
## Encountered 4950 unique sequences from 12313 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-30-H6_S307_L001_R2_001.fastq
## Encountered 4654 unique sequences from 12755 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-4-H2_S308_L001_R2_001.fastq
## Encountered 517 unique sequences from 851 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-4-H4_S309_L001_R2_001.fastq
## Encountered 234 unique sequences from 350 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-4-H8_S310_L001_R2_001.fastq
## Encountered 4123 unique sequences from 8162 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-5-H3_S311_L001_R2_001.fastq
## Encountered 5848 unique sequences from 16183 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-5-H7_S312_L001_R2_001.fastq
## Encountered 5067 unique sequences from 13260 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-5-H9_S313_L001_R2_001.fastq
## Encountered 4305 unique sequences from 12153 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-1-H2_S314_L001_R2_001.fastq
## Encountered 5534 unique sequences from 13295 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-1-H4_S315_L001_R2_001.fastq
## Encountered 2 unique sequences from 2 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-1-H8_S316_L001_R2_001.fastq
## Encountered 4682 unique sequences from 13616 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-14-H5_S318_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-14-H6_S319_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-15-H8_S322_L001_R2_001.fastq
## Encountered 3038 unique sequences from 8785 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-16-H3_S323_L001_R2_001.fastq
## Encountered 11402 unique sequences from 36464 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-16-H7_S324_L001_R2_001.fastq
## Encountered 2804 unique sequences from 8836 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-16-H9_S325_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-2-H3_S326_L001_R2_001.fastq
## Encountered 2495 unique sequences from 7246 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-2-H7_S327_L001_R2_001.fastq
## Encountered 4020 unique sequences from 9836 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-2-H9_S328_L001_R2_001.fastq
## Encountered 5076 unique sequences from 14023 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-13-H1_S329_L001_R2_001.fastq
## Encountered 4494 unique sequences from 14260 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-13-H3_S330_L001_R2_001.fastq
## Encountered 4657 unique sequences from 11753 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-14-H11_S331_L001_R2_001.fastq
## Encountered 3597 unique sequences from 12176 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-14-H6_S332_L001_R2_001.fastq
## Encountered 4682 unique sequences from 11728 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-14-H7_S333_L001_R2_001.fastq
## Encountered 3150 unique sequences from 10651 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-15-H8_S334_L001_R2_001.fastq
## Encountered 4269 unique sequences from 12915 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-21-H10_S335_L001_R2_001.fastq
## Encountered 5422 unique sequences from 16187 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-21-H12_S336_L001_R2_001.fastq
## Encountered 5073 unique sequences from 17032 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-21-H9_S337_L001_R2_001.fastq
## Encountered 4537 unique sequences from 13821 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-27-H21_S338_L001_R2_001.fastq
## Encountered 5712 unique sequences from 15729 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-27-H22_S339_L001_R2_001.fastq
## Encountered 4554 unique sequences from 19232 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-27-H27_S340_L001_R2_001.fastq
## Encountered 5336 unique sequences from 12550 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-28-H25_S341_L001_R2_001.fastq
## Encountered 3726 unique sequences from 10169 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-28-H26_S342_L001_R2_001.fastq
## Encountered 2640 unique sequences from 6196 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-28-H28_S343_L001_R2_001.fastq
## Encountered 4159 unique sequences from 10332 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-29-H17_S344_L001_R2_001.fastq
## Encountered 3139 unique sequences from 9104 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-29-H23_S345_L001_R2_001.fastq
## Encountered 4338 unique sequences from 11907 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-29-H24_S346_L001_R2_001.fastq
## Encountered 329 unique sequences from 715 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-4-H21_S347_L001_R2_001.fastq
## Encountered 4133 unique sequences from 11218 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-4-H22_S348_L001_R2_001.fastq
## Encountered 2153 unique sequences from 4717 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-4-H27_S349_L001_R2_001.fastq
## Encountered 1819 unique sequences from 4547 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-5-H18_S350_L001_R2_001.fastq
## Encountered 674 unique sequences from 1152 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-5-H25_S351_L001_R2_001.fastq
## Encountered 1686 unique sequences from 2834 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-5-H26_S352_L001_R2_001.fastq
## Encountered 1664 unique sequences from 3400 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-6-H17_S353_L001_R2_001.fastq
## Encountered 3037 unique sequences from 7362 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-6-H24_S354_L001_R2_001.fastq
## Encountered 4853 unique sequences from 11939 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-7-H23_S355_L001_R2_001.fastq
## Encountered 3897 unique sequences from 10457 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-14-H10_S356_L001_R2_001.fastq
## Encountered 5895 unique sequences from 22653 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-14-H12_S357_L001_R2_001.fastq
## Encountered 4 unique sequences from 4 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-20-H27_S358_L001_R2_001.fastq
## Encountered 3372 unique sequences from 9282 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-21-H25_S359_L001_R2_001.fastq
## Encountered 3592 unique sequences from 12630 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-21-H26_S360_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-6-H11_S362_L001_R2_001.fastq
## Encountered 4992 unique sequences from 13905 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-6-H6_S364_L001_R2_001.fastq
## Encountered 5478 unique sequences from 13693 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-7-H8_S366_L001_R2_001.fastq
## Encountered 4123 unique sequences from 15342 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-8-H3_S367_L001_R2_001.fastq
## Encountered 3581 unique sequences from 11288 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-12-H3_S368_L001_R2_001.fastq
## Encountered 2311 unique sequences from 6412 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-12-H5_S369_L001_R2_001.fastq
## Encountered 4893 unique sequences from 14325 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-12-H7_S370_L001_R2_001.fastq
## Encountered 3148 unique sequences from 8964 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-13-H6_S371_L001_R2_001.fastq
## Encountered 4394 unique sequences from 12999 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-13-H8_S372_L001_R2_001.fastq
## Encountered 3207 unique sequences from 8254 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-13-H9_S373_L001_R2_001.fastq
## Encountered 1387 unique sequences from 3377 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-14-H3_S374_L001_R2_001.fastq
## Encountered 4315 unique sequences from 15274 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-14-H7_S375_L001_R2_001.fastq
## Encountered 5203 unique sequences from 14310 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-14-H9_S376_L001_R2_001.fastq
## Encountered 4089 unique sequences from 12646 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-16-H5_S377_L001_R2_001.fastq
## Encountered 3959 unique sequences from 11967 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-24-H6_S378_L001_R2_001.fastq
## Encountered 3968 unique sequences from 8763 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-24-H8_S379_L001_R2_001.fastq
## Encountered 4212 unique sequences from 9782 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-25-H2_S380_L001_R2_001.fastq
## Encountered 5800 unique sequences from 13012 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-25-H4_S381_L001_R2_001.fastq
## Encountered 2997 unique sequences from 5350 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-26-H1_S382_L001_R2_001.fastq
## Encountered 3006 unique sequences from 5962 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-26-H7_S383_L001_R2_001.fastq
## Encountered 4938 unique sequences from 10330 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-27-H3_S384_L001_R2_001.fastq
## Encountered 2935 unique sequences from 6278 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-27-H5_S385_L001_R2_001.fastq
## Encountered 5351 unique sequences from 11398 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-8-H1_S386_L001_R2_001.fastq
## Encountered 3532 unique sequences from 9094 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-8-H2_S387_L001_R2_001.fastq
## Encountered 3028 unique sequences from 10848 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-8-H4_S388_L001_R2_001.fastq
## Encountered 4611 unique sequences from 11980 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-9-H2_S389_L001_R2_001.fastq
## Encountered 8967 unique sequences from 23277 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-9-H4_S390_L001_R2_001.fastq
## Encountered 3559 unique sequences from 7104 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-15-H6_S391_L001_R2_001.fastq
## Encountered 6250 unique sequences from 18724 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-16-H4_S392_L001_R2_001.fastq
## Encountered 4292 unique sequences from 13047 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-17-H1_S393_L001_R2_001.fastq
## Encountered 7708 unique sequences from 21557 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-18-H3_S394_L001_R2_001.fastq
## Encountered 3855 unique sequences from 7624 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-18-H7_S395_L001_R2_001.fastq
## Encountered 3019 unique sequences from 8641 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-29-H5_S396_L001_R2_001.fastq
## Encountered 3345 unique sequences from 8843 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-29-H7_S397_L001_R2_001.fastq
## Encountered 6103 unique sequences from 19278 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-30-H8_S398_L001_R2_001.fastq
## Encountered 2558 unique sequences from 8704 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-30-H9_S399_L001_R2_001.fastq
## Encountered 1984 unique sequences from 4403 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-5-H1_S400_L001_R2_001.fastq
## Encountered 2405 unique sequences from 6719 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-5-H2_S401_L001_R2_001.fastq
## Encountered 1796 unique sequences from 4171 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-6-H6_S403_L001_R2_001.fastq
## Encountered 4222 unique sequences from 11259 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-6-H8_S404_L001_R2_001.fastq
## Encountered 7038 unique sequences from 19324 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-6-H9_S405_L001_R2_001.fastq
## Encountered 7783 unique sequences from 22375 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-8-H3_S406_L001_R2_001.fastq
## Encountered 2745 unique sequences from 7103 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-8-H5_S407_L001_R2_001.fastq
## Encountered 3779 unique sequences from 12450 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-8-H7_S408_L001_R2_001.fastq
## Encountered 7183 unique sequences from 22286 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H2_S409_L001_R2_001.fastq
## Encountered 4948 unique sequences from 15351 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H5_S410_L001_R2_001.fastq
## Encountered 6095 unique sequences from 13489 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H6_S411_L001_R2_001.fastq
## Encountered 4031 unique sequences from 11447 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H7_S412_L001_R2_001.fastq
## Encountered 9038 unique sequences from 21306 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H8_S413_L001_R2_001.fastq
## Encountered 4366 unique sequences from 11628 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H9_S414_L001_R2_001.fastq
## Encountered 269 unique sequences from 344 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Ba001_S415_L001_R2_001.fastq
## Encountered 3906 unique sequences from 11589 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Ba002_S416_L001_R2_001.fastq
## Encountered 3979 unique sequences from 8068 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Ba003_S417_L001_R2_001.fastq
## Encountered 2369 unique sequences from 7634 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb001_S418_L001_R2_001.fastq
## Encountered 4114 unique sequences from 13734 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb002_S419_L001_R2_001.fastq
## Encountered 5382 unique sequences from 19049 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb003_S420_L001_R2_001.fastq
## Encountered 8722 unique sequences from 25904 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb004_S421_L001_R2_001.fastq
## Encountered 8655 unique sequences from 23070 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb005_S422_L001_R2_001.fastq
## Encountered 4170 unique sequences from 11519 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb007_S423_L001_R2_001.fastq
## Encountered 4062 unique sequences from 12125 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb008_S424_L001_R2_001.fastq
## Encountered 2318 unique sequences from 6567 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb009_S425_L001_R2_001.fastq
## Encountered 3125 unique sequences from 8599 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb010_S426_L001_R2_001.fastq
## Encountered 3905 unique sequences from 10777 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb011_S427_L001_R2_001.fastq
## Encountered 4056 unique sequences from 8051 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb012_S428_L001_R2_001.fastq
## Encountered 9202 unique sequences from 27231 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb013_S429_L001_R2_001.fastq
## Encountered 2889 unique sequences from 6172 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb014_S430_L001_R2_001.fastq
## Encountered 5348 unique sequences from 15434 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb015_S431_L001_R2_001.fastq
## Encountered 2924 unique sequences from 7061 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb016_S432_L001_R2_001.fastq
## Encountered 2203 unique sequences from 6078 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb017_S433_L001_R2_001.fastq
## Encountered 3042 unique sequences from 7797 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb018_S434_L001_R2_001.fastq
## Encountered 6408 unique sequences from 16225 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb019_S435_L001_R2_001.fastq
## Encountered 5803 unique sequences from 19040 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb020_S436_L001_R2_001.fastq
## Encountered 5081 unique sequences from 20153 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb021_S437_L001_R2_001.fastq
## Encountered 3200 unique sequences from 6625 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb022_S438_L001_R2_001.fastq
## Encountered 4318 unique sequences from 15270 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb023_S439_L001_R2_001.fastq
## Encountered 2922 unique sequences from 8291 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb024_S440_L001_R2_001.fastq
## Encountered 4733 unique sequences from 15599 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb025_S441_L001_R2_001.fastq
## Encountered 3079 unique sequences from 9402 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf001_S442_L001_R2_001.fastq
## Encountered 6606 unique sequences from 18214 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf002_S443_L001_R2_001.fastq
## Encountered 3 unique sequences from 4 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf003_S444_L001_R2_001.fastq
## Encountered 3745 unique sequences from 10825 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf004_S445_L001_R2_001.fastq
## Encountered 2303 unique sequences from 5494 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg001_S446_L001_R2_001.fastq
## Encountered 2221 unique sequences from 6774 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg002_S447_L001_R2_001.fastq
## Encountered 4217 unique sequences from 9593 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg003_S448_L001_R2_001.fastq
## Encountered 4455 unique sequences from 9878 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg004_S449_L001_R2_001.fastq
## Encountered 2797 unique sequences from 8208 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg005_S450_L001_R2_001.fastq
## Encountered 5003 unique sequences from 11754 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg006_S451_L001_R2_001.fastq
## Encountered 4574 unique sequences from 14258 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg007_S452_L001_R2_001.fastq
## Encountered 4175 unique sequences from 13163 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg008_S453_L001_R2_001.fastq
## Encountered 4859 unique sequences from 12164 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg009_S454_L001_R2_001.fastq
## Encountered 2931 unique sequences from 5238 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg010_S455_L001_R2_001.fastq
## Encountered 3208 unique sequences from 6285 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg011_S456_L001_R2_001.fastq
## Encountered 4042 unique sequences from 10184 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg012_S457_L001_R2_001.fastq
## Encountered 2512 unique sequences from 7157 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg013_S458_L001_R2_001.fastq
## Encountered 3832 unique sequences from 12665 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg014_S459_L001_R2_001.fastq
## Encountered 3482 unique sequences from 12895 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg015_S460_L001_R2_001.fastq
## Encountered 3024 unique sequences from 6987 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg016_S461_L001_R2_001.fastq
## Encountered 2581 unique sequences from 5489 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg017_S462_L001_R2_001.fastq
## Encountered 2079 unique sequences from 5605 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg018_S463_L001_R2_001.fastq
## Encountered 2742 unique sequences from 6740 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg019_S464_L001_R2_001.fastq
## Encountered 3018 unique sequences from 8707 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi001_S465_L001_R2_001.fastq
## Encountered 3309 unique sequences from 10410 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi002_S466_L001_R2_001.fastq
## Encountered 4944 unique sequences from 13359 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi003_S467_L001_R2_001.fastq
## Encountered 7625 unique sequences from 20553 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi004_S468_L001_R2_001.fastq
## Encountered 6359 unique sequences from 18623 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi005_S469_L001_R2_001.fastq
## Encountered 62 unique sequences from 149 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi006_S470_L001_R2_001.fastq
## Encountered 2038 unique sequences from 4554 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi007_S471_L001_R2_001.fastq
## Encountered 4492 unique sequences from 9976 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-CKC0001_S472_L001_R2_001.fastq
## Encountered 2291 unique sequences from 7833 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ESE0004_S473_L001_R2_001.fastq
## Encountered 3182 unique sequences from 9116 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-20230909_S474_L001_R2_001.fastq
## Encountered 1 unique sequences from 3 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-20231007_S477_L001_R2_001.fastq
## Encountered 3 unique sequences from 3 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-20231008_S478_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024220A_S480_L001_R2_001.fastq
## Encountered 25 unique sequences from 37 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024220B_S481_L001_R2_001.fastq
## Encountered 969 unique sequences from 2165 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024221A_S482_L001_R2_001.fastq
## Encountered 1253 unique sequences from 2878 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024221B_S483_L001_R2_001.fastq
## Encountered 281 unique sequences from 622 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024222A_S484_L001_R2_001.fastq
## Encountered 411 unique sequences from 823 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024222B_S485_L001_R2_001.fastq
## Encountered 2420 unique sequences from 6313 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024312A_S486_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024312B_S487_L001_R2_001.fastq
## Encountered 6 unique sequences from 6 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024314A_S488_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024314B_S489_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024319_S490_L001_R2_001.fastq
## Encountered 84 unique sequences from 180 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0007_S492_L001_R2_001.fastq
## Encountered 3794 unique sequences from 8407 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0027_S494_L001_R2_001.fastq
## Encountered 3080 unique sequences from 7425 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0044_S495_L001_R2_001.fastq
## Encountered 528 unique sequences from 941 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0045_S496_L001_R2_001.fastq
## Encountered 5511 unique sequences from 16853 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0052_S497_L001_R2_001.fastq
## Encountered 534 unique sequences from 1057 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0054_S498_L001_R2_001.fastq
## Encountered 368 unique sequences from 991 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0055_S499_L001_R2_001.fastq
## Encountered 536 unique sequences from 1345 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0071_S500_L001_R2_001.fastq
## Encountered 2765 unique sequences from 10060 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0095_S501_L001_R2_001.fastq
## Encountered 5254 unique sequences from 18665 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0096_S502_L001_R2_001.fastq
## Encountered 3729 unique sequences from 11780 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0105_S503_L001_R2_001.fastq
## Encountered 5068 unique sequences from 20059 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0106_S504_L001_R2_001.fastq
## Encountered 5117 unique sequences from 13590 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0119_S505_L001_R2_001.fastq
## Encountered 3283 unique sequences from 10904 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0134_S506_L001_R2_001.fastq
## Encountered 3548 unique sequences from 10331 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0135_S507_L001_R2_001.fastq
## Encountered 773 unique sequences from 1762 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0136_S508_L001_R2_001.fastq
## Encountered 3781 unique sequences from 10072 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0137_S509_L001_R2_001.fastq
## Encountered 8283 unique sequences from 21450 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0138_S510_L001_R2_001.fastq
## Encountered 3769 unique sequences from 10763 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0139_S511_L001_R2_001.fastq
## Encountered 4373 unique sequences from 10490 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0150_S512_L001_R2_001.fastq
## Encountered 5095 unique sequences from 13943 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0153_S513_L001_R2_001.fastq
## Encountered 1536 unique sequences from 3156 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0155_S514_L001_R2_001.fastq
## Encountered 4967 unique sequences from 15382 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0156_S515_L001_R2_001.fastq
## Encountered 9150 unique sequences from 27268 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0159_S516_L001_R2_001.fastq
## Encountered 7054 unique sequences from 19126 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0163_S517_L001_R2_001.fastq
## Encountered 3652 unique sequences from 8259 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0165_S518_L001_R2_001.fastq
## Encountered 3258 unique sequences from 7382 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0167_S519_L001_R2_001.fastq
## Encountered 4721 unique sequences from 12356 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0168_S520_L001_R2_001.fastq
## Encountered 2768 unique sequences from 9986 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0169_S521_L001_R2_001.fastq
## Encountered 7078 unique sequences from 18064 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0170_S522_L001_R2_001.fastq
## Encountered 3738 unique sequences from 9654 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0200_S523_L001_R2_001.fastq
## Encountered 3915 unique sequences from 14566 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0201_S524_L001_R2_001.fastq
## Encountered 7435 unique sequences from 23716 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0205_S525_L001_R2_001.fastq
## Encountered 4685 unique sequences from 12367 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0209_S526_L001_R2_001.fastq
## Encountered 1181 unique sequences from 2911 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0221_S527_L001_R2_001.fastq
## Encountered 3655 unique sequences from 8200 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0224_S528_L001_R2_001.fastq
## Encountered 3144 unique sequences from 11029 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0225_S529_L001_R2_001.fastq
## Encountered 5684 unique sequences from 15454 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0227_S530_L001_R2_001.fastq
## Encountered 2821 unique sequences from 9704 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0241_S531_L001_R2_001.fastq
## Encountered 5510 unique sequences from 13922 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0244_S532_L001_R2_001.fastq
## Encountered 5029 unique sequences from 12161 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0246_S533_L001_R2_001.fastq
## Encountered 2729 unique sequences from 5407 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0248_S534_L001_R2_001.fastq
## Encountered 8228 unique sequences from 23791 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0253_S535_L001_R2_001.fastq
## Encountered 4999 unique sequences from 14833 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0254_S536_L001_R2_001.fastq
## Encountered 3663 unique sequences from 9069 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0256_S493_L001_R2_001.fastq
## Encountered 8791 unique sequences from 26403 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0259_S537_L001_R2_001.fastq
## Encountered 4608 unique sequences from 14649 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0263_S538_L001_R2_001.fastq
## Encountered 673 unique sequences from 1481 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0266_S539_L001_R2_001.fastq
## Encountered 469 unique sequences from 896 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0272_S540_L001_R2_001.fastq
## Encountered 688 unique sequences from 977 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-crtl-20240417_S541_L001_R2_001.fastq
## Encountered 1046 unique sequences from 2444 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240409_S542_L001_R2_001.fastq
## Encountered 194 unique sequences from 462 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240418A_S543_L001_R2_001.fastq
## Encountered 3841 unique sequences from 8900 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240418B_S544_L001_R2_001.fastq
## Encountered 21 unique sequences from 43 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240523_S545_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-rbcL-pcr-neg-ctrl-20231021-20231119_S548_L001_R2_001.fastq
## Encountered 2 unique sequences from 3 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-SCA0009_S551_L001_R2_001.fastq
## Encountered 6199 unique sequences from 18374 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-SCA0010_S552_L001_R2_001.fastq
## Encountered 4800 unique sequences from 13370 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-SCA0013_S553_L001_R2_001.fastq
## Encountered 6859 unique sequences from 18429 total sequences read.
#Note that the dereplicated sequences only exist in the R environment, and are not saved into a separate output subdirectory
Extract sample names from filtF (to only include samples that passed the previous filter)
# my file names have 'junk' at the beginning and end of the file name
basename(filtFs[241])
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418B_S544_L001_R1_001.fastq"
strsplit(basename(filtFs[241]),"_S")
## [[1]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418B" "544_L001_R1_001.fastq"
strsplit(basename(filtFs[241]),"_S")[[1]][1]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418B"
strsplit(strsplit(basename(filtFs[241]),"_S")[[1]][1],"-")[[1]]
## [1] "rbcL" "pcr" "rbcL" "neg" "ctrl" "20240418B"
# and their structure (esp length) differs between worker samples, queen samples, extraction negative controls, and pcr negative controls.
paste(strsplit(strsplit(basename(filtFs[241]),"_S")[[1]][1],"-")[[1]][-1],collapse="_")
## [1] "pcr_rbcL_neg_ctrl_20240418B"
paste(strsplit(strsplit(basename(filtFs[226]),"_S")[[1]][1],"-")[[1]][-1],collapse="_")
## [1] "KLS0227"
paste(strsplit(strsplit(basename(filtFs[176]),"_S")[[1]][1],"-")[[1]][-1],collapse="_")
## [1] "ext_neg_ctrl_20231007"
paste(strsplit(strsplit(basename(filtFs[1]), "_S")[[1]][1],"-")[[1]][-1],collapse="_")
## [1] "2020_6_16_H1"
# make a simple function to replicate above
get.sample.name <- function(fname) paste(strsplit(strsplit(basename(fname[1]), "_S")[[1]][1],"-")[[1]][-1],collapse="_")
sample.names <- unname(sapply(filtFs, get.sample.name))
head(sample.names)
## [1] "2020_6_16_H1" "2020_6_16_H5" "2020_6_16_H6" "2020_6_17_H2" "2020_6_17_H4"
## [6] "2020_6_17_H8"
length(sample.names)
## [1] 246
# Name the dereplicated class objects by the sample names
names(derepFs) <- sample.names
names(derepRs) <- sample.names
At this step, the core sample inference algorithm is applied to the dereplicated sequences from /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered (remember that the dereplicated sequences only exist in the R environment)
DADA2 infers sample sequences exactly and resolves differences of as little as 1 nucleotide using the models of the error rates we learned in the previous step
dadaFs <- dada(derepFs, err = errF, multithread = TRUE)
## Sample 1 - 9241 reads in 1458 unique sequences.
## Sample 2 - 5929 reads in 1990 unique sequences.
## Sample 3 - 850 reads in 242 unique sequences.
## Sample 4 - 4559 reads in 1916 unique sequences.
## Sample 5 - 2027 reads in 580 unique sequences.
## Sample 6 - 15437 reads in 2426 unique sequences.
## Sample 7 - 4021 reads in 1391 unique sequences.
## Sample 8 - 8636 reads in 1622 unique sequences.
## Sample 9 - 4778 reads in 959 unique sequences.
## Sample 10 - 15258 reads in 3209 unique sequences.
## Sample 11 - 17366 reads in 4372 unique sequences.
## Sample 12 - 17405 reads in 3275 unique sequences.
## Sample 13 - 12116 reads in 2994 unique sequences.
## Sample 14 - 12313 reads in 2674 unique sequences.
## Sample 15 - 12755 reads in 2500 unique sequences.
## Sample 16 - 851 reads in 355 unique sequences.
## Sample 17 - 350 reads in 166 unique sequences.
## Sample 18 - 8162 reads in 2544 unique sequences.
## Sample 19 - 16183 reads in 3350 unique sequences.
## Sample 20 - 13260 reads in 2798 unique sequences.
## Sample 21 - 12153 reads in 2281 unique sequences.
## Sample 22 - 13295 reads in 3378 unique sequences.
## Sample 23 - 2 reads in 2 unique sequences.
## Sample 24 - 13616 reads in 2738 unique sequences.
## Sample 25 - 1 reads in 1 unique sequences.
## Sample 26 - 1 reads in 1 unique sequences.
## Sample 27 - 8785 reads in 1485 unique sequences.
## Sample 28 - 36464 reads in 5234 unique sequences.
## Sample 29 - 8836 reads in 1441 unique sequences.
## Sample 30 - 1 reads in 1 unique sequences.
## Sample 31 - 7246 reads in 1382 unique sequences.
## Sample 32 - 9836 reads in 2149 unique sequences.
## Sample 33 - 14023 reads in 2901 unique sequences.
## Sample 34 - 14260 reads in 2254 unique sequences.
## Sample 35 - 11753 reads in 2637 unique sequences.
## Sample 36 - 12176 reads in 1857 unique sequences.
## Sample 37 - 11728 reads in 2604 unique sequences.
## Sample 38 - 10651 reads in 1602 unique sequences.
## Sample 39 - 12915 reads in 2309 unique sequences.
## Sample 40 - 16187 reads in 2961 unique sequences.
## Sample 41 - 17032 reads in 2412 unique sequences.
## Sample 42 - 13821 reads in 2377 unique sequences.
## Sample 43 - 15729 reads in 2963 unique sequences.
## Sample 44 - 19232 reads in 1811 unique sequences.
## Sample 45 - 12550 reads in 3128 unique sequences.
## Sample 46 - 10169 reads in 1877 unique sequences.
## Sample 47 - 6196 reads in 1527 unique sequences.
## Sample 48 - 10332 reads in 2508 unique sequences.
## Sample 49 - 9104 reads in 1528 unique sequences.
## Sample 50 - 11907 reads in 2284 unique sequences.
## Sample 51 - 715 reads in 191 unique sequences.
## Sample 52 - 11218 reads in 2264 unique sequences.
## Sample 53 - 4717 reads in 1183 unique sequences.
## Sample 54 - 4547 reads in 1069 unique sequences.
## Sample 55 - 1152 reads in 440 unique sequences.
## Sample 56 - 2834 reads in 1090 unique sequences.
## Sample 57 - 3400 reads in 1094 unique sequences.
## Sample 58 - 7362 reads in 1689 unique sequences.
## Sample 59 - 11939 reads in 2915 unique sequences.
## Sample 60 - 10457 reads in 2123 unique sequences.
## Sample 61 - 22653 reads in 2658 unique sequences.
## Sample 62 - 4 reads in 4 unique sequences.
## Sample 63 - 9282 reads in 1642 unique sequences.
## Sample 64 - 12630 reads in 1689 unique sequences.
## Sample 65 - 1 reads in 1 unique sequences.
## Sample 66 - 13905 reads in 2779 unique sequences.
## Sample 67 - 13693 reads in 3232 unique sequences.
## Sample 68 - 15342 reads in 1829 unique sequences.
## Sample 69 - 11288 reads in 1860 unique sequences.
## Sample 70 - 6412 reads in 1177 unique sequences.
## Sample 71 - 14325 reads in 2436 unique sequences.
## Sample 72 - 8964 reads in 1678 unique sequences.
## Sample 73 - 12999 reads in 2391 unique sequences.
## Sample 74 - 8254 reads in 1918 unique sequences.
## Sample 75 - 3377 reads in 921 unique sequences.
## Sample 76 - 15274 reads in 2373 unique sequences.
## Sample 77 - 14310 reads in 2981 unique sequences.
## Sample 78 - 12646 reads in 2371 unique sequences.
## Sample 79 - 11967 reads in 2142 unique sequences.
## Sample 80 - 8763 reads in 2587 unique sequences.
## Sample 81 - 9782 reads in 2773 unique sequences.
## Sample 82 - 13012 reads in 3801 unique sequences.
## Sample 83 - 5350 reads in 2176 unique sequences.
## Sample 84 - 5962 reads in 2165 unique sequences.
## Sample 85 - 10330 reads in 3472 unique sequences.
## Sample 86 - 6278 reads in 1942 unique sequences.
## Sample 87 - 11398 reads in 3675 unique sequences.
## Sample 88 - 9094 reads in 1906 unique sequences.
## Sample 89 - 10848 reads in 1706 unique sequences.
## Sample 90 - 11980 reads in 2437 unique sequences.
## Sample 91 - 23277 reads in 5129 unique sequences.
## Sample 92 - 7104 reads in 2379 unique sequences.
## Sample 93 - 18724 reads in 3223 unique sequences.
## Sample 94 - 13047 reads in 2230 unique sequences.
## Sample 95 - 21557 reads in 4109 unique sequences.
## Sample 96 - 7624 reads in 2315 unique sequences.
## Sample 97 - 8641 reads in 1583 unique sequences.
## Sample 98 - 8843 reads in 1829 unique sequences.
## Sample 99 - 19278 reads in 3136 unique sequences.
## Sample 100 - 8704 reads in 1142 unique sequences.
## Sample 101 - 4403 reads in 1078 unique sequences.
## Sample 102 - 6719 reads in 1269 unique sequences.
## Sample 103 - 4171 reads in 1042 unique sequences.
## Sample 104 - 11259 reads in 2242 unique sequences.
## Sample 105 - 19324 reads in 3890 unique sequences.
## Sample 106 - 22375 reads in 4267 unique sequences.
## Sample 107 - 7103 reads in 1435 unique sequences.
## Sample 108 - 12450 reads in 1913 unique sequences.
## Sample 109 - 22286 reads in 3427 unique sequences.
## Sample 110 - 15351 reads in 2576 unique sequences.
## Sample 111 - 13489 reads in 3942 unique sequences.
## Sample 112 - 11447 reads in 2186 unique sequences.
## Sample 113 - 21306 reads in 5257 unique sequences.
## Sample 114 - 11628 reads in 2240 unique sequences.
## Sample 115 - 344 reads in 232 unique sequences.
## Sample 116 - 11589 reads in 1936 unique sequences.
## Sample 117 - 8068 reads in 2555 unique sequences.
## Sample 118 - 7634 reads in 1150 unique sequences.
## Sample 119 - 13734 reads in 2073 unique sequences.
## Sample 120 - 19049 reads in 2733 unique sequences.
## Sample 121 - 25904 reads in 4817 unique sequences.
## Sample 122 - 23070 reads in 4831 unique sequences.
## Sample 123 - 11519 reads in 2293 unique sequences.
## Sample 124 - 12125 reads in 2107 unique sequences.
## Sample 125 - 6567 reads in 1172 unique sequences.
## Sample 126 - 8599 reads in 1672 unique sequences.
## Sample 127 - 10777 reads in 1976 unique sequences.
## Sample 128 - 8051 reads in 2442 unique sequences.
## Sample 129 - 27231 reads in 4932 unique sequences.
## Sample 130 - 6172 reads in 1573 unique sequences.
## Sample 131 - 15434 reads in 2922 unique sequences.
## Sample 132 - 7061 reads in 1641 unique sequences.
## Sample 133 - 6078 reads in 1258 unique sequences.
## Sample 134 - 7797 reads in 1724 unique sequences.
## Sample 135 - 16225 reads in 3752 unique sequences.
## Sample 136 - 19040 reads in 2852 unique sequences.
## Sample 137 - 20153 reads in 2300 unique sequences.
## Sample 138 - 6625 reads in 1942 unique sequences.
## Sample 139 - 15270 reads in 2128 unique sequences.
## Sample 140 - 8291 reads in 1651 unique sequences.
## Sample 141 - 15599 reads in 2620 unique sequences.
## Sample 142 - 9402 reads in 1501 unique sequences.
## Sample 143 - 18214 reads in 3433 unique sequences.
## Sample 144 - 4 reads in 3 unique sequences.
## Sample 145 - 10825 reads in 1932 unique sequences.
## Sample 146 - 5494 reads in 1389 unique sequences.
## Sample 147 - 6774 reads in 1199 unique sequences.
## Sample 148 - 9593 reads in 2431 unique sequences.
## Sample 149 - 9878 reads in 2390 unique sequences.
## Sample 150 - 8208 reads in 1476 unique sequences.
## Sample 151 - 11754 reads in 3025 unique sequences.
## Sample 152 - 14258 reads in 2148 unique sequences.
## Sample 153 - 13163 reads in 1728 unique sequences.
## Sample 154 - 12164 reads in 2815 unique sequences.
## Sample 155 - 5238 reads in 2005 unique sequences.
## Sample 156 - 6285 reads in 2092 unique sequences.
## Sample 157 - 10184 reads in 2397 unique sequences.
## Sample 158 - 7157 reads in 1236 unique sequences.
## Sample 159 - 12665 reads in 1739 unique sequences.
## Sample 160 - 12895 reads in 1893 unique sequences.
## Sample 161 - 6987 reads in 1793 unique sequences.
## Sample 162 - 5489 reads in 1522 unique sequences.
## Sample 163 - 5605 reads in 1124 unique sequences.
## Sample 164 - 6740 reads in 1468 unique sequences.
## Sample 165 - 8707 reads in 1534 unique sequences.
## Sample 166 - 10410 reads in 1691 unique sequences.
## Sample 167 - 13359 reads in 2813 unique sequences.
## Sample 168 - 20553 reads in 4425 unique sequences.
## Sample 169 - 18623 reads in 3192 unique sequences.
## Sample 170 - 149 reads in 36 unique sequences.
## Sample 171 - 4554 reads in 1268 unique sequences.
## Sample 172 - 9976 reads in 2803 unique sequences.
## Sample 173 - 7833 reads in 1046 unique sequences.
## Sample 174 - 9116 reads in 1588 unique sequences.
## Sample 175 - 3 reads in 2 unique sequences.
## Sample 176 - 3 reads in 3 unique sequences.
## Sample 177 - 1 reads in 1 unique sequences.
## Sample 178 - 37 reads in 26 unique sequences.
## Sample 179 - 2165 reads in 518 unique sequences.
## Sample 180 - 2878 reads in 686 unique sequences.
## Sample 181 - 622 reads in 151 unique sequences.
## Sample 182 - 823 reads in 198 unique sequences.
## Sample 183 - 6313 reads in 1232 unique sequences.
## Sample 184 - 1 reads in 1 unique sequences.
## Sample 185 - 6 reads in 6 unique sequences.
## Sample 186 - 1 reads in 1 unique sequences.
## Sample 187 - 1 reads in 1 unique sequences.
## Sample 188 - 180 reads in 43 unique sequences.
## Sample 189 - 8407 reads in 2322 unique sequences.
## Sample 190 - 7425 reads in 1596 unique sequences.
## Sample 191 - 941 reads in 286 unique sequences.
## Sample 192 - 16853 reads in 2717 unique sequences.
## Sample 193 - 1057 reads in 254 unique sequences.
## Sample 194 - 991 reads in 179 unique sequences.
## Sample 195 - 1345 reads in 233 unique sequences.
## Sample 196 - 10060 reads in 1254 unique sequences.
## Sample 197 - 18665 reads in 2376 unique sequences.
## Sample 198 - 11780 reads in 1976 unique sequences.
## Sample 199 - 20059 reads in 2328 unique sequences.
## Sample 200 - 13590 reads in 2956 unique sequences.
## Sample 201 - 10904 reads in 1553 unique sequences.
## Sample 202 - 10331 reads in 1712 unique sequences.
## Sample 203 - 1762 reads in 452 unique sequences.
## Sample 204 - 10072 reads in 2012 unique sequences.
## Sample 205 - 21450 reads in 4607 unique sequences.
## Sample 206 - 10763 reads in 1797 unique sequences.
## Sample 207 - 10490 reads in 2508 unique sequences.
## Sample 208 - 13943 reads in 2725 unique sequences.
## Sample 209 - 3156 reads in 1076 unique sequences.
## Sample 210 - 15382 reads in 2738 unique sequences.
## Sample 211 - 27268 reads in 4653 unique sequences.
## Sample 212 - 19126 reads in 3951 unique sequences.
## Sample 213 - 8259 reads in 2418 unique sequences.
## Sample 214 - 7382 reads in 2021 unique sequences.
## Sample 215 - 12356 reads in 2582 unique sequences.
## Sample 216 - 9986 reads in 1500 unique sequences.
## Sample 217 - 18064 reads in 4242 unique sequences.
## Sample 218 - 9654 reads in 2102 unique sequences.
## Sample 219 - 14566 reads in 1968 unique sequences.
## Sample 220 - 23716 reads in 4120 unique sequences.
## Sample 221 - 12367 reads in 3134 unique sequences.
## Sample 222 - 2911 reads in 685 unique sequences.
## Sample 223 - 8200 reads in 2247 unique sequences.
## Sample 224 - 11029 reads in 1547 unique sequences.
## Sample 225 - 15454 reads in 3266 unique sequences.
## Sample 226 - 9704 reads in 1554 unique sequences.
## Sample 227 - 13922 reads in 2935 unique sequences.
## Sample 228 - 12161 reads in 3025 unique sequences.
## Sample 229 - 5407 reads in 1672 unique sequences.
## Sample 230 - 23791 reads in 4225 unique sequences.
## Sample 231 - 14833 reads in 2633 unique sequences.
## Sample 232 - 9069 reads in 2234 unique sequences.
## Sample 233 - 26403 reads in 5437 unique sequences.
## Sample 234 - 14649 reads in 2285 unique sequences.
## Sample 235 - 1481 reads in 413 unique sequences.
## Sample 236 - 896 reads in 294 unique sequences.
## Sample 237 - 977 reads in 506 unique sequences.
## Sample 238 - 2444 reads in 599 unique sequences.
## Sample 239 - 462 reads in 111 unique sequences.
## Sample 240 - 8900 reads in 2234 unique sequences.
## Sample 241 - 43 reads in 21 unique sequences.
## Sample 242 - 1 reads in 1 unique sequences.
## Sample 243 - 3 reads in 3 unique sequences.
## Sample 244 - 18374 reads in 3186 unique sequences.
## Sample 245 - 13370 reads in 2303 unique sequences.
## Sample 246 - 18429 reads in 3542 unique sequences.
dadaRs <- dada(derepRs, err = errR, multithread = TRUE)
## Sample 1 - 9241 reads in 3205 unique sequences.
## Sample 2 - 5929 reads in 3022 unique sequences.
## Sample 3 - 850 reads in 397 unique sequences.
## Sample 4 - 4559 reads in 2719 unique sequences.
## Sample 5 - 2027 reads in 930 unique sequences.
## Sample 6 - 15437 reads in 4895 unique sequences.
## Sample 7 - 4021 reads in 2167 unique sequences.
## Sample 8 - 8636 reads in 3009 unique sequences.
## Sample 9 - 4778 reads in 1876 unique sequences.
## Sample 10 - 15258 reads in 5820 unique sequences.
## Sample 11 - 17366 reads in 7596 unique sequences.
## Sample 12 - 17405 reads in 6427 unique sequences.
## Sample 13 - 12116 reads in 4939 unique sequences.
## Sample 14 - 12313 reads in 4950 unique sequences.
## Sample 15 - 12755 reads in 4654 unique sequences.
## Sample 16 - 851 reads in 517 unique sequences.
## Sample 17 - 350 reads in 234 unique sequences.
## Sample 18 - 8162 reads in 4123 unique sequences.
## Sample 19 - 16183 reads in 5848 unique sequences.
## Sample 20 - 13260 reads in 5067 unique sequences.
## Sample 21 - 12153 reads in 4305 unique sequences.
## Sample 22 - 13295 reads in 5534 unique sequences.
## Sample 23 - 2 reads in 2 unique sequences.
## Sample 24 - 13616 reads in 4682 unique sequences.
## Sample 25 - 1 reads in 1 unique sequences.
## Sample 26 - 1 reads in 1 unique sequences.
## Sample 27 - 8785 reads in 3038 unique sequences.
## Sample 28 - 36464 reads in 11402 unique sequences.
## Sample 29 - 8836 reads in 2804 unique sequences.
## Sample 30 - 1 reads in 1 unique sequences.
## Sample 31 - 7246 reads in 2495 unique sequences.
## Sample 32 - 9836 reads in 4020 unique sequences.
## Sample 33 - 14023 reads in 5076 unique sequences.
## Sample 34 - 14260 reads in 4494 unique sequences.
## Sample 35 - 11753 reads in 4657 unique sequences.
## Sample 36 - 12176 reads in 3597 unique sequences.
## Sample 37 - 11728 reads in 4682 unique sequences.
## Sample 38 - 10651 reads in 3150 unique sequences.
## Sample 39 - 12915 reads in 4269 unique sequences.
## Sample 40 - 16187 reads in 5422 unique sequences.
## Sample 41 - 17032 reads in 5073 unique sequences.
## Sample 42 - 13821 reads in 4537 unique sequences.
## Sample 43 - 15729 reads in 5712 unique sequences.
## Sample 44 - 19232 reads in 4554 unique sequences.
## Sample 45 - 12550 reads in 5336 unique sequences.
## Sample 46 - 10169 reads in 3726 unique sequences.
## Sample 47 - 6196 reads in 2640 unique sequences.
## Sample 48 - 10332 reads in 4159 unique sequences.
## Sample 49 - 9104 reads in 3139 unique sequences.
## Sample 50 - 11907 reads in 4338 unique sequences.
## Sample 51 - 715 reads in 329 unique sequences.
## Sample 52 - 11218 reads in 4133 unique sequences.
## Sample 53 - 4717 reads in 2153 unique sequences.
## Sample 54 - 4547 reads in 1819 unique sequences.
## Sample 55 - 1152 reads in 674 unique sequences.
## Sample 56 - 2834 reads in 1686 unique sequences.
## Sample 57 - 3400 reads in 1664 unique sequences.
## Sample 58 - 7362 reads in 3037 unique sequences.
## Sample 59 - 11939 reads in 4853 unique sequences.
## Sample 60 - 10457 reads in 3897 unique sequences.
## Sample 61 - 22653 reads in 5895 unique sequences.
## Sample 62 - 4 reads in 4 unique sequences.
## Sample 63 - 9282 reads in 3372 unique sequences.
## Sample 64 - 12630 reads in 3592 unique sequences.
## Sample 65 - 1 reads in 1 unique sequences.
## Sample 66 - 13905 reads in 4992 unique sequences.
## Sample 67 - 13693 reads in 5478 unique sequences.
## Sample 68 - 15342 reads in 4123 unique sequences.
## Sample 69 - 11288 reads in 3581 unique sequences.
## Sample 70 - 6412 reads in 2311 unique sequences.
## Sample 71 - 14325 reads in 4893 unique sequences.
## Sample 72 - 8964 reads in 3148 unique sequences.
## Sample 73 - 12999 reads in 4394 unique sequences.
## Sample 74 - 8254 reads in 3207 unique sequences.
## Sample 75 - 3377 reads in 1387 unique sequences.
## Sample 76 - 15274 reads in 4315 unique sequences.
## Sample 77 - 14310 reads in 5203 unique sequences.
## Sample 78 - 12646 reads in 4089 unique sequences.
## Sample 79 - 11967 reads in 3959 unique sequences.
## Sample 80 - 8763 reads in 3968 unique sequences.
## Sample 81 - 9782 reads in 4212 unique sequences.
## Sample 82 - 13012 reads in 5800 unique sequences.
## Sample 83 - 5350 reads in 2997 unique sequences.
## Sample 84 - 5962 reads in 3006 unique sequences.
## Sample 85 - 10330 reads in 4938 unique sequences.
## Sample 86 - 6278 reads in 2935 unique sequences.
## Sample 87 - 11398 reads in 5351 unique sequences.
## Sample 88 - 9094 reads in 3532 unique sequences.
## Sample 89 - 10848 reads in 3028 unique sequences.
## Sample 90 - 11980 reads in 4611 unique sequences.
## Sample 91 - 23277 reads in 8967 unique sequences.
## Sample 92 - 7104 reads in 3559 unique sequences.
## Sample 93 - 18724 reads in 6250 unique sequences.
## Sample 94 - 13047 reads in 4292 unique sequences.
## Sample 95 - 21557 reads in 7708 unique sequences.
## Sample 96 - 7624 reads in 3855 unique sequences.
## Sample 97 - 8641 reads in 3019 unique sequences.
## Sample 98 - 8843 reads in 3345 unique sequences.
## Sample 99 - 19278 reads in 6103 unique sequences.
## Sample 100 - 8704 reads in 2558 unique sequences.
## Sample 101 - 4403 reads in 1984 unique sequences.
## Sample 102 - 6719 reads in 2405 unique sequences.
## Sample 103 - 4171 reads in 1796 unique sequences.
## Sample 104 - 11259 reads in 4222 unique sequences.
## Sample 105 - 19324 reads in 7038 unique sequences.
## Sample 106 - 22375 reads in 7783 unique sequences.
## Sample 107 - 7103 reads in 2745 unique sequences.
## Sample 108 - 12450 reads in 3779 unique sequences.
## Sample 109 - 22286 reads in 7183 unique sequences.
## Sample 110 - 15351 reads in 4948 unique sequences.
## Sample 111 - 13489 reads in 6095 unique sequences.
## Sample 112 - 11447 reads in 4031 unique sequences.
## Sample 113 - 21306 reads in 9038 unique sequences.
## Sample 114 - 11628 reads in 4366 unique sequences.
## Sample 115 - 344 reads in 269 unique sequences.
## Sample 116 - 11589 reads in 3906 unique sequences.
## Sample 117 - 8068 reads in 3979 unique sequences.
## Sample 118 - 7634 reads in 2369 unique sequences.
## Sample 119 - 13734 reads in 4114 unique sequences.
## Sample 120 - 19049 reads in 5382 unique sequences.
## Sample 121 - 25904 reads in 8722 unique sequences.
## Sample 122 - 23070 reads in 8655 unique sequences.
## Sample 123 - 11519 reads in 4170 unique sequences.
## Sample 124 - 12125 reads in 4062 unique sequences.
## Sample 125 - 6567 reads in 2318 unique sequences.
## Sample 126 - 8599 reads in 3125 unique sequences.
## Sample 127 - 10777 reads in 3905 unique sequences.
## Sample 128 - 8051 reads in 4056 unique sequences.
## Sample 129 - 27231 reads in 9202 unique sequences.
## Sample 130 - 6172 reads in 2889 unique sequences.
## Sample 131 - 15434 reads in 5348 unique sequences.
## Sample 132 - 7061 reads in 2924 unique sequences.
## Sample 133 - 6078 reads in 2203 unique sequences.
## Sample 134 - 7797 reads in 3042 unique sequences.
## Sample 135 - 16225 reads in 6408 unique sequences.
## Sample 136 - 19040 reads in 5803 unique sequences.
## Sample 137 - 20153 reads in 5081 unique sequences.
## Sample 138 - 6625 reads in 3200 unique sequences.
## Sample 139 - 15270 reads in 4318 unique sequences.
## Sample 140 - 8291 reads in 2922 unique sequences.
## Sample 141 - 15599 reads in 4733 unique sequences.
## Sample 142 - 9402 reads in 3079 unique sequences.
## Sample 143 - 18214 reads in 6606 unique sequences.
## Sample 144 - 4 reads in 3 unique sequences.
## Sample 145 - 10825 reads in 3745 unique sequences.
## Sample 146 - 5494 reads in 2303 unique sequences.
## Sample 147 - 6774 reads in 2221 unique sequences.
## Sample 148 - 9593 reads in 4217 unique sequences.
## Sample 149 - 9878 reads in 4455 unique sequences.
## Sample 150 - 8208 reads in 2797 unique sequences.
## Sample 151 - 11754 reads in 5003 unique sequences.
## Sample 152 - 14258 reads in 4574 unique sequences.
## Sample 153 - 13163 reads in 4175 unique sequences.
## Sample 154 - 12164 reads in 4859 unique sequences.
## Sample 155 - 5238 reads in 2931 unique sequences.
## Sample 156 - 6285 reads in 3208 unique sequences.
## Sample 157 - 10184 reads in 4042 unique sequences.
## Sample 158 - 7157 reads in 2512 unique sequences.
## Sample 159 - 12665 reads in 3832 unique sequences.
## Sample 160 - 12895 reads in 3482 unique sequences.
## Sample 161 - 6987 reads in 3024 unique sequences.
## Sample 162 - 5489 reads in 2581 unique sequences.
## Sample 163 - 5605 reads in 2079 unique sequences.
## Sample 164 - 6740 reads in 2742 unique sequences.
## Sample 165 - 8707 reads in 3018 unique sequences.
## Sample 166 - 10410 reads in 3309 unique sequences.
## Sample 167 - 13359 reads in 4944 unique sequences.
## Sample 168 - 20553 reads in 7625 unique sequences.
## Sample 169 - 18623 reads in 6359 unique sequences.
## Sample 170 - 149 reads in 62 unique sequences.
## Sample 171 - 4554 reads in 2038 unique sequences.
## Sample 172 - 9976 reads in 4492 unique sequences.
## Sample 173 - 7833 reads in 2291 unique sequences.
## Sample 174 - 9116 reads in 3182 unique sequences.
## Sample 175 - 3 reads in 1 unique sequences.
## Sample 176 - 3 reads in 3 unique sequences.
## Sample 177 - 1 reads in 1 unique sequences.
## Sample 178 - 37 reads in 25 unique sequences.
## Sample 179 - 2165 reads in 969 unique sequences.
## Sample 180 - 2878 reads in 1253 unique sequences.
## Sample 181 - 622 reads in 281 unique sequences.
## Sample 182 - 823 reads in 411 unique sequences.
## Sample 183 - 6313 reads in 2420 unique sequences.
## Sample 184 - 1 reads in 1 unique sequences.
## Sample 185 - 6 reads in 6 unique sequences.
## Sample 186 - 1 reads in 1 unique sequences.
## Sample 187 - 1 reads in 1 unique sequences.
## Sample 188 - 180 reads in 84 unique sequences.
## Sample 189 - 8407 reads in 3794 unique sequences.
## Sample 190 - 7425 reads in 3080 unique sequences.
## Sample 191 - 941 reads in 528 unique sequences.
## Sample 192 - 16853 reads in 5511 unique sequences.
## Sample 193 - 1057 reads in 534 unique sequences.
## Sample 194 - 991 reads in 368 unique sequences.
## Sample 195 - 1345 reads in 536 unique sequences.
## Sample 196 - 10060 reads in 2765 unique sequences.
## Sample 197 - 18665 reads in 5254 unique sequences.
## Sample 198 - 11780 reads in 3729 unique sequences.
## Sample 199 - 20059 reads in 5068 unique sequences.
## Sample 200 - 13590 reads in 5117 unique sequences.
## Sample 201 - 10904 reads in 3283 unique sequences.
## Sample 202 - 10331 reads in 3548 unique sequences.
## Sample 203 - 1762 reads in 773 unique sequences.
## Sample 204 - 10072 reads in 3781 unique sequences.
## Sample 205 - 21450 reads in 8283 unique sequences.
## Sample 206 - 10763 reads in 3769 unique sequences.
## Sample 207 - 10490 reads in 4373 unique sequences.
## Sample 208 - 13943 reads in 5095 unique sequences.
## Sample 209 - 3156 reads in 1536 unique sequences.
## Sample 210 - 15382 reads in 4967 unique sequences.
## Sample 211 - 27268 reads in 9150 unique sequences.
## Sample 212 - 19126 reads in 7054 unique sequences.
## Sample 213 - 8259 reads in 3652 unique sequences.
## Sample 214 - 7382 reads in 3258 unique sequences.
## Sample 215 - 12356 reads in 4721 unique sequences.
## Sample 216 - 9986 reads in 2768 unique sequences.
## Sample 217 - 18064 reads in 7078 unique sequences.
## Sample 218 - 9654 reads in 3738 unique sequences.
## Sample 219 - 14566 reads in 3915 unique sequences.
## Sample 220 - 23716 reads in 7435 unique sequences.
## Sample 221 - 12367 reads in 4685 unique sequences.
## Sample 222 - 2911 reads in 1181 unique sequences.
## Sample 223 - 8200 reads in 3655 unique sequences.
## Sample 224 - 11029 reads in 3144 unique sequences.
## Sample 225 - 15454 reads in 5684 unique sequences.
## Sample 226 - 9704 reads in 2821 unique sequences.
## Sample 227 - 13922 reads in 5510 unique sequences.
## Sample 228 - 12161 reads in 5029 unique sequences.
## Sample 229 - 5407 reads in 2729 unique sequences.
## Sample 230 - 23791 reads in 8228 unique sequences.
## Sample 231 - 14833 reads in 4999 unique sequences.
## Sample 232 - 9069 reads in 3663 unique sequences.
## Sample 233 - 26403 reads in 8791 unique sequences.
## Sample 234 - 14649 reads in 4608 unique sequences.
## Sample 235 - 1481 reads in 673 unique sequences.
## Sample 236 - 896 reads in 469 unique sequences.
## Sample 237 - 977 reads in 688 unique sequences.
## Sample 238 - 2444 reads in 1046 unique sequences.
## Sample 239 - 462 reads in 194 unique sequences.
## Sample 240 - 8900 reads in 3841 unique sequences.
## Sample 241 - 43 reads in 21 unique sequences.
## Sample 242 - 1 reads in 1 unique sequences.
## Sample 243 - 3 reads in 2 unique sequences.
## Sample 244 - 18374 reads in 6199 unique sequences.
## Sample 245 - 13370 reads in 4800 unique sequences.
## Sample 246 - 18429 reads in 6859 unique sequences.
We’ve inferred the sample sequences in the forward and reverse reads independently. Now it’s time to merge those inferred sequences together, throwing out those pairs of reads that don’t match
mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=TRUE, minOverlap = 11, maxMismatch = 0)
## 8965 paired-reads (in 48 unique pairings) successfully merged out of 9165 (in 107 pairings) input.
## 4858 paired-reads (in 187 unique pairings) successfully merged out of 5713 (in 466 pairings) input.
## 761 paired-reads (in 7 unique pairings) successfully merged out of 823 (in 19 pairings) input.
## 2982 paired-reads (in 181 unique pairings) successfully merged out of 4385 (in 632 pairings) input.
## 1830 paired-reads (in 20 unique pairings) successfully merged out of 1959 (in 50 pairings) input.
## 14791 paired-reads (in 103 unique pairings) successfully merged out of 15299 (in 195 pairings) input.
## 3247 paired-reads (in 104 unique pairings) successfully merged out of 3893 (in 313 pairings) input.
## 8339 paired-reads (in 100 unique pairings) successfully merged out of 8558 (in 177 pairings) input.
## 4320 paired-reads (in 55 unique pairings) successfully merged out of 4728 (in 138 pairings) input.
## 13957 paired-reads (in 195 unique pairings) successfully merged out of 15051 (in 513 pairings) input.
## 15540 paired-reads (in 498 unique pairings) successfully merged out of 17073 (in 1021 pairings) input.
## 16216 paired-reads (in 254 unique pairings) successfully merged out of 17250 (in 507 pairings) input.
## 11003 paired-reads (in 197 unique pairings) successfully merged out of 11996 (in 506 pairings) input.
## 11620 paired-reads (in 250 unique pairings) successfully merged out of 12204 (in 487 pairings) input.
## 12018 paired-reads (in 173 unique pairings) successfully merged out of 12654 (in 382 pairings) input.
## 567 paired-reads (in 16 unique pairings) successfully merged out of 820 (in 65 pairings) input.
## 284 paired-reads (in 10 unique pairings) successfully merged out of 322 (in 20 pairings) input.
## 7061 paired-reads (in 395 unique pairings) successfully merged out of 7910 (in 739 pairings) input.
## 15185 paired-reads (in 202 unique pairings) successfully merged out of 15982 (in 399 pairings) input.
## 12306 paired-reads (in 246 unique pairings) successfully merged out of 13099 (in 519 pairings) input.
## 11510 paired-reads (in 86 unique pairings) successfully merged out of 12041 (in 186 pairings) input.
## 11719 paired-reads (in 267 unique pairings) successfully merged out of 13065 (in 670 pairings) input.
## No paired-reads (in ZERO unique pairings) successfully merged out of 2 pairings) input.
## 12827 paired-reads (in 139 unique pairings) successfully merged out of 13483 (in 337 pairings) input.
## 1 paired-reads (in 1 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 8601 paired-reads (in 32 unique pairings) successfully merged out of 8731 (in 88 pairings) input.
## 35105 paired-reads (in 324 unique pairings) successfully merged out of 36129 (in 626 pairings) input.
## 8555 paired-reads (in 43 unique pairings) successfully merged out of 8740 (in 87 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 6958 paired-reads (in 61 unique pairings) successfully merged out of 7189 (in 125 pairings) input.
## 8993 paired-reads (in 163 unique pairings) successfully merged out of 9714 (in 395 pairings) input.
## 13210 paired-reads (in 158 unique pairings) successfully merged out of 13921 (in 338 pairings) input.
## 13769 paired-reads (in 104 unique pairings) successfully merged out of 14142 (in 211 pairings) input.
## 10503 paired-reads (in 243 unique pairings) successfully merged out of 11541 (in 537 pairings) input.
## 11736 paired-reads (in 42 unique pairings) successfully merged out of 12053 (in 129 pairings) input.
## 10687 paired-reads (in 179 unique pairings) successfully merged out of 11514 (in 431 pairings) input.
## 10452 paired-reads (in 63 unique pairings) successfully merged out of 10585 (in 96 pairings) input.
## 12426 paired-reads (in 170 unique pairings) successfully merged out of 12808 (in 284 pairings) input.
## 15566 paired-reads (in 197 unique pairings) successfully merged out of 16031 (in 376 pairings) input.
## 16581 paired-reads (in 116 unique pairings) successfully merged out of 16889 (in 214 pairings) input.
## 13193 paired-reads (in 144 unique pairings) successfully merged out of 13645 (in 266 pairings) input.
## 15055 paired-reads (in 275 unique pairings) successfully merged out of 15531 (in 444 pairings) input.
## 19176 paired-reads (in 5 unique pairings) successfully merged out of 19189 (in 9 pairings) input.
## 11605 paired-reads (in 337 unique pairings) successfully merged out of 12302 (in 659 pairings) input.
## 9643 paired-reads (in 163 unique pairings) successfully merged out of 9965 (in 268 pairings) input.
## 5787 paired-reads (in 154 unique pairings) successfully merged out of 6106 (in 274 pairings) input.
## 9722 paired-reads (in 209 unique pairings) successfully merged out of 10195 (in 400 pairings) input.
## 8741 paired-reads (in 55 unique pairings) successfully merged out of 9031 (in 136 pairings) input.
## 11174 paired-reads (in 181 unique pairings) successfully merged out of 11718 (in 347 pairings) input.
## 664 paired-reads (in 6 unique pairings) successfully merged out of 691 (in 12 pairings) input.
## 10227 paired-reads (in 144 unique pairings) successfully merged out of 11074 (in 365 pairings) input.
## 4340 paired-reads (in 83 unique pairings) successfully merged out of 4621 (in 179 pairings) input.
## 4188 paired-reads (in 53 unique pairings) successfully merged out of 4488 (in 131 pairings) input.
## 920 paired-reads (in 45 unique pairings) successfully merged out of 1127 (in 98 pairings) input.
## 1828 paired-reads (in 87 unique pairings) successfully merged out of 2688 (in 322 pairings) input.
## 2883 paired-reads (in 93 unique pairings) successfully merged out of 3285 (in 200 pairings) input.
## 6788 paired-reads (in 112 unique pairings) successfully merged out of 7181 (in 254 pairings) input.
## 10565 paired-reads (in 292 unique pairings) successfully merged out of 11766 (in 649 pairings) input.
## 9631 paired-reads (in 109 unique pairings) successfully merged out of 10312 (in 279 pairings) input.
## 22525 paired-reads (in 48 unique pairings) successfully merged out of 22615 (in 76 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 3 (in 1 pairings) input.
## 8856 paired-reads (in 33 unique pairings) successfully merged out of 9167 (in 80 pairings) input.
## 12409 paired-reads (in 65 unique pairings) successfully merged out of 12571 (in 105 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 13142 paired-reads (in 193 unique pairings) successfully merged out of 13746 (in 372 pairings) input.
## 12548 paired-reads (in 238 unique pairings) successfully merged out of 13365 (in 556 pairings) input.
## 15258 paired-reads (in 28 unique pairings) successfully merged out of 15294 (in 40 pairings) input.
## 10998 paired-reads (in 55 unique pairings) successfully merged out of 11174 (in 102 pairings) input.
## 6096 paired-reads (in 56 unique pairings) successfully merged out of 6328 (in 124 pairings) input.
## 13614 paired-reads (in 142 unique pairings) successfully merged out of 14241 (in 294 pairings) input.
## 8521 paired-reads (in 50 unique pairings) successfully merged out of 8883 (in 131 pairings) input.
## 12552 paired-reads (in 157 unique pairings) successfully merged out of 12883 (in 299 pairings) input.
## 7644 paired-reads (in 139 unique pairings) successfully merged out of 8190 (in 324 pairings) input.
## 3164 paired-reads (in 37 unique pairings) successfully merged out of 3324 (in 78 pairings) input.
## 14940 paired-reads (in 35 unique pairings) successfully merged out of 15170 (in 85 pairings) input.
## 13267 paired-reads (in 286 unique pairings) successfully merged out of 14165 (in 571 pairings) input.
## 12100 paired-reads (in 136 unique pairings) successfully merged out of 12553 (in 251 pairings) input.
## 11626 paired-reads (in 65 unique pairings) successfully merged out of 11844 (in 138 pairings) input.
## 7464 paired-reads (in 172 unique pairings) successfully merged out of 8422 (in 490 pairings) input.
## 8716 paired-reads (in 210 unique pairings) successfully merged out of 9594 (in 503 pairings) input.
## 11406 paired-reads (in 314 unique pairings) successfully merged out of 12672 (in 746 pairings) input.
## 4293 paired-reads (in 179 unique pairings) successfully merged out of 5111 (in 506 pairings) input.
## 5019 paired-reads (in 153 unique pairings) successfully merged out of 5771 (in 468 pairings) input.
## 8600 paired-reads (in 294 unique pairings) successfully merged out of 9938 (in 755 pairings) input.
## 5413 paired-reads (in 117 unique pairings) successfully merged out of 6051 (in 302 pairings) input.
## 9709 paired-reads (in 274 unique pairings) successfully merged out of 10928 (in 713 pairings) input.
## 8514 paired-reads (in 92 unique pairings) successfully merged out of 8877 (in 238 pairings) input.
## 10661 paired-reads (in 27 unique pairings) successfully merged out of 10800 (in 53 pairings) input.
## 10641 paired-reads (in 162 unique pairings) successfully merged out of 11847 (in 408 pairings) input.
## 21511 paired-reads (in 517 unique pairings) successfully merged out of 22922 (in 1120 pairings) input.
## 5749 paired-reads (in 199 unique pairings) successfully merged out of 6853 (in 545 pairings) input.
## 17494 paired-reads (in 239 unique pairings) successfully merged out of 18496 (in 496 pairings) input.
## 12124 paired-reads (in 138 unique pairings) successfully merged out of 12931 (in 275 pairings) input.
## 20032 paired-reads (in 307 unique pairings) successfully merged out of 21250 (in 699 pairings) input.
## 6429 paired-reads (in 258 unique pairings) successfully merged out of 7377 (in 556 pairings) input.
## 8191 paired-reads (in 72 unique pairings) successfully merged out of 8530 (in 132 pairings) input.
## 8426 paired-reads (in 120 unique pairings) successfully merged out of 8768 (in 242 pairings) input.
## 18638 paired-reads (in 258 unique pairings) successfully merged out of 19048 (in 416 pairings) input.
## 8604 paired-reads (in 20 unique pairings) successfully merged out of 8658 (in 32 pairings) input.
## 4010 paired-reads (in 92 unique pairings) successfully merged out of 4312 (in 195 pairings) input.
## 6483 paired-reads (in 50 unique pairings) successfully merged out of 6634 (in 93 pairings) input.
## 3999 paired-reads (in 94 unique pairings) successfully merged out of 4121 (in 155 pairings) input.
## 10538 paired-reads (in 135 unique pairings) successfully merged out of 11119 (in 311 pairings) input.
## 18240 paired-reads (in 299 unique pairings) successfully merged out of 18965 (in 515 pairings) input.
## 21192 paired-reads (in 324 unique pairings) successfully merged out of 22038 (in 608 pairings) input.
## 6815 paired-reads (in 78 unique pairings) successfully merged out of 7040 (in 156 pairings) input.
## 12219 paired-reads (in 86 unique pairings) successfully merged out of 12398 (in 140 pairings) input.
## 21478 paired-reads (in 207 unique pairings) successfully merged out of 22113 (in 395 pairings) input.
## 14683 paired-reads (in 50 unique pairings) successfully merged out of 14849 (in 83 pairings) input.
## 11651 paired-reads (in 353 unique pairings) successfully merged out of 13002 (in 890 pairings) input.
## 10672 paired-reads (in 61 unique pairings) successfully merged out of 10900 (in 96 pairings) input.
## 17814 paired-reads (in 602 unique pairings) successfully merged out of 20739 (in 1627 pairings) input.
## 11203 paired-reads (in 63 unique pairings) successfully merged out of 11506 (in 154 pairings) input.
## 173 paired-reads (in 6 unique pairings) successfully merged out of 272 (in 30 pairings) input.
## 11335 paired-reads (in 26 unique pairings) successfully merged out of 11506 (in 60 pairings) input.
## 7025 paired-reads (in 87 unique pairings) successfully merged out of 7793 (in 386 pairings) input.
## 7459 paired-reads (in 24 unique pairings) successfully merged out of 7576 (in 60 pairings) input.
## 13454 paired-reads (in 57 unique pairings) successfully merged out of 13656 (in 114 pairings) input.
## 18691 paired-reads (in 65 unique pairings) successfully merged out of 18944 (in 125 pairings) input.
## 24659 paired-reads (in 241 unique pairings) successfully merged out of 25602 (in 569 pairings) input.
## 21810 paired-reads (in 349 unique pairings) successfully merged out of 22708 (in 681 pairings) input.
## 10926 paired-reads (in 115 unique pairings) successfully merged out of 11371 (in 270 pairings) input.
## 11479 paired-reads (in 64 unique pairings) successfully merged out of 11961 (in 186 pairings) input.
## 6219 paired-reads (in 45 unique pairings) successfully merged out of 6501 (in 108 pairings) input.
## 7897 paired-reads (in 82 unique pairings) successfully merged out of 8511 (in 209 pairings) input.
## 9948 paired-reads (in 93 unique pairings) successfully merged out of 10621 (in 204 pairings) input.
## 6730 paired-reads (in 116 unique pairings) successfully merged out of 7762 (in 508 pairings) input.
## 25706 paired-reads (in 262 unique pairings) successfully merged out of 26991 (in 637 pairings) input.
## 5640 paired-reads (in 47 unique pairings) successfully merged out of 6001 (in 188 pairings) input.
## 14739 paired-reads (in 132 unique pairings) successfully merged out of 15299 (in 340 pairings) input.
## 6461 paired-reads (in 78 unique pairings) successfully merged out of 6904 (in 191 pairings) input.
## 5604 paired-reads (in 28 unique pairings) successfully merged out of 5921 (in 88 pairings) input.
## 7120 paired-reads (in 78 unique pairings) successfully merged out of 7676 (in 210 pairings) input.
## 14390 paired-reads (in 255 unique pairings) successfully merged out of 15979 (in 756 pairings) input.
## 18257 paired-reads (in 100 unique pairings) successfully merged out of 18916 (in 290 pairings) input.
## 19904 paired-reads (in 14 unique pairings) successfully merged out of 20094 (in 43 pairings) input.
## 5941 paired-reads (in 51 unique pairings) successfully merged out of 6423 (in 259 pairings) input.
## 14903 paired-reads (in 56 unique pairings) successfully merged out of 15165 (in 133 pairings) input.
## 7803 paired-reads (in 86 unique pairings) successfully merged out of 8190 (in 201 pairings) input.
## 15334 paired-reads (in 36 unique pairings) successfully merged out of 15512 (in 93 pairings) input.
## 8909 paired-reads (in 36 unique pairings) successfully merged out of 9239 (in 109 pairings) input.
## 17277 paired-reads (in 102 unique pairings) successfully merged out of 18002 (in 321 pairings) input.
## 2 paired-reads (in 1 unique pairings) successfully merged out of 2 (in 1 pairings) input.
## 10408 paired-reads (in 84 unique pairings) successfully merged out of 10779 (in 210 pairings) input.
## 4920 paired-reads (in 74 unique pairings) successfully merged out of 5400 (in 233 pairings) input.
## 6498 paired-reads (in 22 unique pairings) successfully merged out of 6684 (in 83 pairings) input.
## 8520 paired-reads (in 133 unique pairings) successfully merged out of 9354 (in 421 pairings) input.
## 8735 paired-reads (in 113 unique pairings) successfully merged out of 9660 (in 418 pairings) input.
## 7718 paired-reads (in 28 unique pairings) successfully merged out of 8070 (in 106 pairings) input.
## 10832 paired-reads (in 140 unique pairings) successfully merged out of 11541 (in 459 pairings) input.
## 13762 paired-reads (in 48 unique pairings) successfully merged out of 14076 (in 118 pairings) input.
## 12790 paired-reads (in 50 unique pairings) successfully merged out of 13084 (in 117 pairings) input.
## 11008 paired-reads (in 80 unique pairings) successfully merged out of 11920 (in 347 pairings) input.
## 4349 paired-reads (in 66 unique pairings) successfully merged out of 5087 (in 269 pairings) input.
## 5331 paired-reads (in 98 unique pairings) successfully merged out of 6127 (in 389 pairings) input.
## 9182 paired-reads (in 93 unique pairings) successfully merged out of 9978 (in 329 pairings) input.
## 6877 paired-reads (in 27 unique pairings) successfully merged out of 7070 (in 77 pairings) input.
## 12339 paired-reads (in 31 unique pairings) successfully merged out of 12602 (in 89 pairings) input.
## 12641 paired-reads (in 28 unique pairings) successfully merged out of 12812 (in 61 pairings) input.
## 6085 paired-reads (in 78 unique pairings) successfully merged out of 6797 (in 295 pairings) input.
## 4512 paired-reads (in 67 unique pairings) successfully merged out of 5335 (in 239 pairings) input.
## 5255 paired-reads (in 24 unique pairings) successfully merged out of 5499 (in 73 pairings) input.
## 5896 paired-reads (in 67 unique pairings) successfully merged out of 6572 (in 207 pairings) input.
## 8222 paired-reads (in 51 unique pairings) successfully merged out of 8621 (in 140 pairings) input.
## 10061 paired-reads (in 44 unique pairings) successfully merged out of 10304 (in 116 pairings) input.
## 12684 paired-reads (in 80 unique pairings) successfully merged out of 13159 (in 246 pairings) input.
## 19108 paired-reads (in 214 unique pairings) successfully merged out of 20206 (in 651 pairings) input.
## 17896 paired-reads (in 76 unique pairings) successfully merged out of 18520 (in 238 pairings) input.
## 149 paired-reads (in 1 unique pairings) successfully merged out of 149 (in 1 pairings) input.
## 4144 paired-reads (in 31 unique pairings) successfully merged out of 4430 (in 130 pairings) input.
## 8545 paired-reads (in 200 unique pairings) successfully merged out of 9699 (in 639 pairings) input.
## 7779 paired-reads (in 5 unique pairings) successfully merged out of 7796 (in 9 pairings) input.
## 8599 paired-reads (in 56 unique pairings) successfully merged out of 8998 (in 119 pairings) input.
## 3 paired-reads (in 1 unique pairings) successfully merged out of 3 (in 1 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 27 paired-reads (in 5 unique pairings) successfully merged out of 27 (in 5 pairings) input.
## 2144 paired-reads (in 23 unique pairings) successfully merged out of 2160 (in 37 pairings) input.
## 2821 paired-reads (in 25 unique pairings) successfully merged out of 2866 (in 58 pairings) input.
## 614 paired-reads (in 10 unique pairings) successfully merged out of 617 (in 13 pairings) input.
## 799 paired-reads (in 12 unique pairings) successfully merged out of 818 (in 19 pairings) input.
## 6184 paired-reads (in 19 unique pairings) successfully merged out of 6273 (in 57 pairings) input.
## 1 paired-reads (in 1 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 1 paired-reads (in 1 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 175 paired-reads (in 1 unique pairings) successfully merged out of 175 (in 1 pairings) input.
## 6860 paired-reads (in 127 unique pairings) successfully merged out of 8177 (in 352 pairings) input.
## 6783 paired-reads (in 80 unique pairings) successfully merged out of 7303 (in 217 pairings) input.
## 766 paired-reads (in 20 unique pairings) successfully merged out of 910 (in 52 pairings) input.
## 16093 paired-reads (in 121 unique pairings) successfully merged out of 16669 (in 234 pairings) input.
## 940 paired-reads (in 21 unique pairings) successfully merged out of 1052 (in 36 pairings) input.
## 978 paired-reads (in 1 unique pairings) successfully merged out of 979 (in 2 pairings) input.
## 1337 paired-reads (in 1 unique pairings) successfully merged out of 1337 (in 1 pairings) input.
## 9979 paired-reads (in 5 unique pairings) successfully merged out of 9995 (in 8 pairings) input.
## 18351 paired-reads (in 27 unique pairings) successfully merged out of 18544 (in 90 pairings) input.
## 11311 paired-reads (in 66 unique pairings) successfully merged out of 11659 (in 163 pairings) input.
## 19757 paired-reads (in 54 unique pairings) successfully merged out of 19992 (in 111 pairings) input.
## 12565 paired-reads (in 131 unique pairings) successfully merged out of 13367 (in 360 pairings) input.
## 10561 paired-reads (in 42 unique pairings) successfully merged out of 10810 (in 90 pairings) input.
## 9992 paired-reads (in 44 unique pairings) successfully merged out of 10198 (in 113 pairings) input.
## 1696 paired-reads (in 17 unique pairings) successfully merged out of 1728 (in 36 pairings) input.
## 9599 paired-reads (in 91 unique pairings) successfully merged out of 9956 (in 187 pairings) input.
## 20079 paired-reads (in 219 unique pairings) successfully merged out of 21089 (in 633 pairings) input.
## 10400 paired-reads (in 45 unique pairings) successfully merged out of 10629 (in 122 pairings) input.
## 9792 paired-reads (in 79 unique pairings) successfully merged out of 10283 (in 236 pairings) input.
## 13361 paired-reads (in 105 unique pairings) successfully merged out of 13731 (in 256 pairings) input.
## 2838 paired-reads (in 42 unique pairings) successfully merged out of 3021 (in 107 pairings) input.
## 14857 paired-reads (in 104 unique pairings) successfully merged out of 15245 (in 212 pairings) input.
## 26398 paired-reads (in 220 unique pairings) successfully merged out of 27068 (in 422 pairings) input.
## 18135 paired-reads (in 208 unique pairings) successfully merged out of 18919 (in 545 pairings) input.
## 7228 paired-reads (in 65 unique pairings) successfully merged out of 8034 (in 344 pairings) input.
## 6811 paired-reads (in 49 unique pairings) successfully merged out of 7204 (in 171 pairings) input.
## 11137 paired-reads (in 109 unique pairings) successfully merged out of 12077 (in 367 pairings) input.
## 9854 paired-reads (in 28 unique pairings) successfully merged out of 9937 (in 56 pairings) input.
## 16743 paired-reads (in 211 unique pairings) successfully merged out of 17808 (in 565 pairings) input.
## 9242 paired-reads (in 64 unique pairings) successfully merged out of 9517 (in 162 pairings) input.
## 14436 paired-reads (in 15 unique pairings) successfully merged out of 14509 (in 33 pairings) input.
## 22697 paired-reads (in 168 unique pairings) successfully merged out of 23444 (in 431 pairings) input.
## 11685 paired-reads (in 168 unique pairings) successfully merged out of 12163 (in 372 pairings) input.
## 2637 paired-reads (in 16 unique pairings) successfully merged out of 2836 (in 48 pairings) input.
## 7470 paired-reads (in 58 unique pairings) successfully merged out of 8016 (in 228 pairings) input.
## 10857 paired-reads (in 11 unique pairings) successfully merged out of 10971 (in 30 pairings) input.
## 14527 paired-reads (in 107 unique pairings) successfully merged out of 15266 (in 328 pairings) input.
## 9450 paired-reads (in 21 unique pairings) successfully merged out of 9641 (in 58 pairings) input.
## 10693 paired-reads (in 135 unique pairings) successfully merged out of 13792 (in 355 pairings) input.
## 11180 paired-reads (in 141 unique pairings) successfully merged out of 11902 (in 364 pairings) input.
## 4622 paired-reads (in 114 unique pairings) successfully merged out of 5210 (in 307 pairings) input.
## 22213 paired-reads (in 370 unique pairings) successfully merged out of 23584 (in 783 pairings) input.
## 13987 paired-reads (in 126 unique pairings) successfully merged out of 14667 (in 300 pairings) input.
## 8393 paired-reads (in 62 unique pairings) successfully merged out of 8896 (in 244 pairings) input.
## 25267 paired-reads (in 330 unique pairings) successfully merged out of 26174 (in 726 pairings) input.
## 14312 paired-reads (in 53 unique pairings) successfully merged out of 14531 (in 123 pairings) input.
## 1373 paired-reads (in 15 unique pairings) successfully merged out of 1429 (in 38 pairings) input.
## 760 paired-reads (in 5 unique pairings) successfully merged out of 860 (in 16 pairings) input.
## 649 paired-reads (in 19 unique pairings) successfully merged out of 895 (in 93 pairings) input.
## 2387 paired-reads (in 20 unique pairings) successfully merged out of 2435 (in 32 pairings) input.
## 455 paired-reads (in 2 unique pairings) successfully merged out of 455 (in 2 pairings) input.
## 8326 paired-reads (in 47 unique pairings) successfully merged out of 8844 (in 217 pairings) input.
## 41 paired-reads (in 2 unique pairings) successfully merged out of 41 (in 2 pairings) input.
## 1 paired-reads (in 1 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 3 paired-reads (in 1 unique pairings) successfully merged out of 3 (in 1 pairings) input.
## 17203 paired-reads (in 170 unique pairings) successfully merged out of 18183 (in 400 pairings) input.
## 12666 paired-reads (in 88 unique pairings) successfully merged out of 13264 (in 251 pairings) input.
## 17553 paired-reads (in 139 unique pairings) successfully merged out of 18223 (in 386 pairings) input.
#this paper used min overlap of 10bp with "nrITS2": https://www.sciencedirect.com/science/article/pii/S0048969721055455#s0010
#I could play with minOverlap parameter to see the effects on merging, but leaving at 11 for now
# length=30L; overlap=25; mismat=0
# mergers.test <- mergePairs(head(dadaFs, n=length), head(derepFs, n=length), head(dadaRs, n=length), head(derepRs, n=length), verbose=TRUE, minOverlap = overlap, maxMismatch = mismat)
# rm(length,overlap,mismat,mergers.test)
The mergePairs(…) function returns a data.frame corresponding to each successfully merged unique sequence. The “forward” and “reverse” columns record which forward and reverse sequence contributed to that merged sequence.
We can now construct an amplicon sequence variant table (ASV) table, a higher-resolution version of the OTU table produced by traditional methods.
seqtab <- makeSequenceTable(mergers)
dim(seqtab)
## [1] 246 15213
# 246 samples
# 15,213 ASVs
A chimera is a single DNA sequence originating when multiple transcripts or DNA sequences get joined. Chimeras can be considered artifacts and be filtered out from the data during processing
The number of unique variants that are chimeras is higher in exact amplicon sequence variant (ASV) methods like DADA2 than they were in OTU methods, as chimeras very close to the real sequences are the most common type of chimera, and those used to be hidden by being lumped into an OTU. So some expectations based on previous OTU processing should be modified a little bit.
Robert Edgar discusses this in more detail in his uchime2 paper: https://doi.org/10.1101/074252
seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) #more stringent parameter minFoldParentOverAbundance=2
## Identified 14259 bimeras out of 15213 input sequences.
#Identified 14249 bimeras out of 15201 input sequences.
length(sample.names)
## [1] 246
rownames(seqtab.nochim) <- sample.names
sum(seqtab) # reads
## [1] 2332765
sum(seqtab.nochim) # reads after removing chimeras
## [1] 1928495
sum(seqtab.nochim)/sum(seqtab) # proportion of reads remaining
## [1] 0.8266992
100-((sum(seqtab.nochim)/sum(seqtab))*100) # 17 percent of reads removed as chimeras
## [1] 17.33008
The more important metric here is the fraction of reads removed as bimeras, which is <20% here, so in the range of what we see. It is normal that a much higher fraction of ASVs than reads will be removed as bimeras, because chimeras are highly diverse but usually quite rare. You will see more chimeric ASVs if you sequence deeply, but not a meaningfully higher number of chimeric reads.
If you’re seeing more than 20% of reads being chimeric, you may want to re-examine your PCR protocol in the future. Longer extension times and fewer PCR cycles are both approaches that have been shown to reduce the formation of chimeric amplicons.
Looking distribution of sequence lengths in the non-chimeric ASVs
table(nchar(getSequences(seqtab.nochim)))
##
## 57 60 63 65 67 68 74 79 81 82 89 90 95 96 100 104 106 110 111 112
## 3 1 1 3 1 2 1 1 1 1 1 1 1 3 2 2 1 1 1 1
## 113 115 116 117 118 121 122 123 124 126 129 132 133 135 145 146 147 148 150 155
## 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1
## 156 160 164 166 168 169 171 172 173 175 177 178 182 185 189 190 192 196 199 204
## 1 1 1 4 1 2 1 1 1 1 1 1 1 1 1 2 2 1 2 1
## 206 208 210 212 216 221 222 227 228 233 239 240 244 254 257 267 280 302 318 324
## 1 1 1 1 1 2 2 1 2 4 1 1 1 1 1 1 1 1 1 1
## 345 350 370 371 373 388 389 396 401 411 416 420 422 425 426 427 428 434 435 437
## 3 1 2 2 1 2 2 1 1 1 2 1 2 2 6 2 3 1 2 1
## 439 441 443 444 445 446 447 448 449 450 451 453 454 455 456 466 469 470 477 490
## 1 1 1 1 13 630 8 10 44 4 5 1 1 1 3 68 14 1 1 1
## 493 494 496
## 1 1 1
sum(table(nchar(getSequences(seqtab.nochim)))) #total ASVs
## [1] 954
plot(table(nchar(getSequences(seqtab.nochim))))
This threshold has been used before for characterizing pollinator microbiomes (Hammer et al. 2020, 2023). Since my pollens are expected to be a lot more simple than a microbiome, I feel this threshold is quite conservative.
seqtab.nochim<-seqtab.nochim[,!!colSums(seqtab.nochim > 100)]
Citations for this step: Hammer, T. J., J. C. Dickerson, W. O. McMillan, and N. Fierer. 2020. Heliconius Butterflies Host Characteristic and Phylogenetically Structured Adult-Stage Microbiomes. Applied and Environmental Microbiology 86. Hammer, T. J., J. Kueneman, M. Argueta-Guzmán, Q. S. McFrederick, Lady Grant, W. Wcislo, S. Buchmann, and B. N. Danforth. 2023. Bee breweries: The unusually fermentative, lactobacilli-dominated brood cell microbiomes of cellophane bees. Frontiers in Microbiology 14:1–16.
The steps & info below are largely from this tutorial: https://benjjneb.github.io/decontam/vignettes/decontam_intro.html#necessary-ingredients
The investigation of environmental microbial communities and microbiomes has been transformed by the recent widespread adoption of culture-free high-throughput sequencing methods. In amplicon sequencing a particular genetic locus is amplified from DNA extracted from the community of interest, and then sequenced on a next-generation sequencing platform. In shotgun metagenomics, bulk DNA is extracted from the community of interest and sequenced. Both techniques provide cost-effective and culture-free characterizations of microbial communities.
However, the accuracy of these methods is limited in practice by the introduction of contaminating DNA that was not truly present in the sampled community. This contaminating DNA can come from several sources, such as the reagents used in the sequencing reaction, and can critically interfere with downstream analyses, especially in lower biomass environments. The decontam package provides simple statistical methods to identify and visualize contaminating DNA features, allowing them to be removed and a more accurate picture of sampled communities to be constructed from marker-gene and metagenomics data.
###Prep phyloseq objects
#load packages
library(decontam)
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse() masks Biostrings::collapse(), IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ purrr::compact() masks XVector::compact()
## ✖ purrr::compose() masks ShortRead::compose()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks GenomicAlignments::first(), S4Vectors::first()
## ✖ dplyr::id() masks ShortRead::id()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks GenomicAlignments::last()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks GenomicAlignments::second(), S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice() masks XVector::slice(), IRanges::slice()
## ✖ tibble::view() masks ShortRead::view()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#load sample data
samp.ctrls.conc<-read_excel("/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/SampleConc.xlsx")
head(samp.ctrls.conc)
## # A tibble: 6 × 5
## SampleID SampleID_AllUnderscores Control Conc_ng.uL Note
## <chr> <chr> <lgl> <dbl> <chr>
## 1 ITS2_2020-6-16_H1 ITS2_2020_6_16_H1 FALSE 60.1 <NA>
## 2 ITS2_2020-6-16_H5 ITS2_2020_6_16_H5 FALSE 53.7 <NA>
## 3 ITS2_2020-6-16_H6 ITS2_2020_6_16_H6 FALSE 41.8 <NA>
## 4 ITS2_2020-6-17_H2 ITS2_2020_6_17_H2 FALSE 45.7 <NA>
## 5 ITS2_2020-6-17_H4 ITS2_2020_6_17_H4 FALSE 75.7 <NA>
## 6 ITS2_2020-6-17_H8 ITS2_2020_6_17_H8 FALSE 59.6 <NA>
#filter sample data for just rbcL samples
samp.ctrls.conc <- samp.ctrls.conc %>% filter(str_starts(SampleID,'rbcL'))
detach("package:tidyverse")
#create phyloseq objects with seqtab and sample data (i.e., samp.ctrls.conc)
SAMP <- sample_data(samp.ctrls.conc)
sample_names(SAMP) <- sample_data(SAMP)$SampleID_AllUnderscores
OTU <- otu_table(seqtab.nochim, taxa_are_rows = F, errorIfNULL=TRUE)
sample_names(OTU)<-paste0("rbcL_",sample_names(OTU))
#checking if name formats in SAMP and OTU objects match
head(sample_names(SAMP))
## [1] "rbcL_2020_6_16_H1" "rbcL_2020_6_16_H5" "rbcL_2020_6_16_H6"
## [4] "rbcL_2020_6_17_H2" "rbcL_2020_6_17_H4" "rbcL_2020_6_17_H8"
head(sample_names(OTU))
## [1] "rbcL_2020_6_16_H1" "rbcL_2020_6_16_H5" "rbcL_2020_6_16_H6"
## [4] "rbcL_2020_6_17_H2" "rbcL_2020_6_17_H4" "rbcL_2020_6_17_H8"
#checking if number of samples in SAMP and OTU objects match
identical(sample_names(SAMP),sample_names(OTU)) # The safe and reliable way to test two objects for being exactly equal. It returns TRUE in this case, FALSE in every other case.
## [1] FALSE
match(sample_names(SAMP), sample_names(OTU)) # match returns a vector of the positions of (first) matches of its first argument in its second.
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 NA 25 26 NA NA 27 28 29 30 31 32 33
## [37] 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
## [55] 52 53 54 55 56 57 58 59 60 61 62 63 64 65 NA 66 NA 67
## [73] NA 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
## [91] 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
## [109] 103 NA 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
## [127] 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
## [145] 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
## [163] 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
## [181] 174 175 NA NA 176 177 NA 178 179 180 181 182 183 184 185 186 187 188
## [199] NA 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
## [217] 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
## [235] 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
## [253] 242 NA NA 243 NA NA 244 245 246
sample_names(SAMP) %in% sample_names(OTU) # %in% is a more intuitive interface as a binary operator, which returns a logical vector indicating if there is a match or not for its left operand.
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] FALSE TRUE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE
## [73] FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [109] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE FALSE FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
## [193] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
## [205] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [217] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [229] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [253] TRUE FALSE FALSE TRUE FALSE FALSE TRUE TRUE TRUE
#subset phyloseq object with sample info to contain only the samples present in the OTU obj
SAMP<-prune_samples(sample_names(SAMP) %in% sample_names(OTU), SAMP) #prune_samples() is a method for pruning/filtering unwanted samples by defining those you want to keep. first argument is a logical vector where the kept samples are TRUE, and length is equal to the number of samples in object x; second argument is the phyloseq object to be pruned (subsetted)
#join phyloseq objects into one
physeq = phyloseq(OTU, SAMP)
physeq
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 259 taxa and 246 samples ]
## sample_data() Sample Data: [ 246 samples by 5 sample variables ]
slotNames(physeq) #slots for "tax_table", "phy_tree", "refseq" are empty
## [1] "otu_table" "tax_table" "sam_data" "phy_tree" "refseq"
#reorder physeq
physeq.reord <- physeq
otu_table(physeq.reord) <- otu_table(physeq.reord)[order(sample_data(physeq.reord)$Control),] # reorder so the controls appear last
A quick first look at the library sizes (i.e. the number of reads) in each sample, as a function of whether that sample was a true positive sample or a negative control:
#plot read numbers for control vs sample
df <- as.data.frame(sample_data(physeq)) # Put sample_data into a ggplot-friendly data.frame
df$LibrarySize <- sample_sums(physeq) #sum read numbers
df <- df[order(df$LibrarySize),] #sort by total read numbers
df$Index <- seq(nrow(df)) #create index based on read number sort order
ggplot(data=df, aes(x=Index, y=LibrarySize, color=as.factor(Control))) + geom_point() #plot of read numbers of every library, colored by control vs unk samples
ggplot(data=df, aes(x=Index, y=LibrarySize, color=Conc_ng.uL)) + geom_point() #plot of read numbers of every library, colored by library stock concentration
The first contaminant identification method we’ll use is the “frequency” method. In this method, the distribution of the frequency of each sequence feature as a function of the input DNA concentration is used to identify contaminants.
The second contaminant identification method is the “prevalence” method. In this method, the prevalence (presence/absence across samples) of each sequence feature in true positive samples is compared to the prevalence in negative controls to identify contaminants.
The final, “combined” method: The frequency and prevalence probabilities are combined with Fisher’s method and used to identify contaminants.
#identify contaminants by frequency & prevalence combined
sample_data(physeq.reord)$is.neg <- sample_data(physeq.reord)$Control == "TRUE"
contamdf.comb <- isContaminant(physeq.reord, method="combined", conc="Conc_ng.uL", neg="is.neg")
## Warning in .is_contaminant(seqtab, conc = conc, neg = neg, method = method, :
## Removed 14 samples with zero total counts (or frequency).
## Warning in .is_contaminant(seqtab, conc = conc, neg = neg, method = method, :
## Removed 14 samples with zero total counts (or frequency).
table(contamdf.comb$contaminant)
##
## FALSE TRUE
## 251 8
which(contamdf.comb$contaminant)
## [1] 2 9 13 18 25 26 43 50
# Make phyloseq object of presence-absence in negative controls and true samples
ps.pa <- transform_sample_counts(physeq.reord, function(abund) 1*(abund>0))
ps.pa.neg <- prune_samples(sample_data(ps.pa)$Control == "TRUE", ps.pa)
ps.pa.pos <- prune_samples(sample_data(ps.pa)$Control == "FALSE", ps.pa)
# Make data.frame of prevalence in positive and negative samples
df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg),
contaminant=contamdf.comb$contaminant)
# Plot the number of times these taxa were observed in negative controls and positive samples
ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant)) + geom_point() +
xlab("Prevalence (Negative Controls)") + ylab("Prevalence (True Samples)")
#Samples seem to split pretty cleanly into a branch that shows up mostly in positive samples, and another that shows up mostly in negative controls, and the contaminant assignment (at default probability threshold) has done a good job of identifying those mostly in negative controls.
#remove contaminants, create seqtab.nochim.nocontam object
physeq.reord.noncontam <- prune_taxa(!contamdf.comb$contaminant, physeq.reord) #create subsetted phyloseq object with the contaminants removed (pruned)
seqtab.nochim.nocontam <- otu_table(physeq.reord.noncontam) #extract otu table from pruned data
class(seqtab.nochim.nocontam) <- "matrix" #coerce to matrix (so we can manipulate and export more easily)
## Warning in class(seqtab.nochim.nocontam) <- "matrix": Setting class(x) to
## "matrix" sets attribute to NULL; result will no longer be an S4 object
substr(rownames(seqtab.nochim.nocontam), 6, 100) #captures a substring, starting at character 6 (from the left) and continuing up to 100 characters (this will grab sample name without the rbcL designation)
## [1] "2020_6_16_H1"
## [2] "2020_6_16_H5"
## [3] "2020_6_16_H6"
## [4] "2020_6_17_H2"
## [5] "2020_6_17_H4"
## [6] "2020_6_17_H8"
## [7] "2020_6_18_H3"
## [8] "2020_6_18_H7"
## [9] "2020_6_18_H9"
## [10] "2020_6_3_H1"
## [11] "2020_6_3_H5"
## [12] "2020_6_3_H6"
## [13] "2020_6_30_H1"
## [14] "2020_6_30_H5"
## [15] "2020_6_30_H6"
## [16] "2020_6_4_H2"
## [17] "2020_6_4_H4"
## [18] "2020_6_4_H8"
## [19] "2020_6_5_H3"
## [20] "2020_6_5_H7"
## [21] "2020_6_5_H9"
## [22] "2020_7_1_H2"
## [23] "2020_7_1_H4"
## [24] "2020_7_1_H8"
## [25] "2020_7_14_H5"
## [26] "2020_7_14_H6"
## [27] "2020_7_15_H8"
## [28] "2020_7_16_H3"
## [29] "2020_7_16_H7"
## [30] "2020_7_16_H9"
## [31] "2020_7_2_H3"
## [32] "2020_7_2_H7"
## [33] "2020_7_2_H9"
## [34] "2021_6_13_H1"
## [35] "2021_6_13_H3"
## [36] "2021_6_14_H11"
## [37] "2021_6_14_H6"
## [38] "2021_6_14_H7"
## [39] "2021_6_15_H8"
## [40] "2021_6_21_H10"
## [41] "2021_6_21_H12"
## [42] "2021_6_21_H9"
## [43] "2021_6_27_H21"
## [44] "2021_6_27_H22"
## [45] "2021_6_27_H27"
## [46] "2021_6_28_H25"
## [47] "2021_6_28_H26"
## [48] "2021_6_28_H28"
## [49] "2021_6_29_H17"
## [50] "2021_6_29_H23"
## [51] "2021_6_29_H24"
## [52] "2021_6_4_H21"
## [53] "2021_6_4_H22"
## [54] "2021_6_4_H27"
## [55] "2021_6_5_H18"
## [56] "2021_6_5_H25"
## [57] "2021_6_5_H26"
## [58] "2021_6_6_H17"
## [59] "2021_6_6_H24"
## [60] "2021_6_7_H23"
## [61] "2021_7_14_H10"
## [62] "2021_7_14_H12"
## [63] "2021_7_20_H27"
## [64] "2021_7_21_H25"
## [65] "2021_7_21_H26"
## [66] "2021_7_6_H11"
## [67] "2021_7_6_H6"
## [68] "2021_7_7_H8"
## [69] "2021_7_8_H3"
## [70] "2023_6_12_H3"
## [71] "2023_6_12_H5"
## [72] "2023_6_12_H7"
## [73] "2023_6_13_H6"
## [74] "2023_6_13_H8"
## [75] "2023_6_13_H9"
## [76] "2023_6_14_H3"
## [77] "2023_6_14_H7"
## [78] "2023_6_14_H9"
## [79] "2023_6_16_H5"
## [80] "2023_6_24_H6"
## [81] "2023_6_24_H8"
## [82] "2023_6_25_H2"
## [83] "2023_6_25_H4"
## [84] "2023_6_26_H1"
## [85] "2023_6_26_H7"
## [86] "2023_6_27_H3"
## [87] "2023_6_27_H5"
## [88] "2023_6_8_H1"
## [89] "2023_6_8_H2"
## [90] "2023_6_8_H4"
## [91] "2023_6_9_H2"
## [92] "2023_6_9_H4"
## [93] "2023_7_15_H6"
## [94] "2023_7_16_H4"
## [95] "2023_7_17_H1"
## [96] "2023_7_18_H3"
## [97] "2023_7_18_H7"
## [98] "2023_7_29_H5"
## [99] "2023_7_29_H7"
## [100] "2023_7_30_H8"
## [101] "2023_7_30_H9"
## [102] "2023_7_5_H1"
## [103] "2023_7_5_H2"
## [104] "2023_7_6_H6"
## [105] "2023_7_6_H8"
## [106] "2023_7_6_H9"
## [107] "2023_7_8_H3"
## [108] "2023_7_8_H5"
## [109] "2023_7_8_H7"
## [110] "2023_8_4_H2"
## [111] "2023_8_4_H5"
## [112] "2023_8_4_H6"
## [113] "2023_8_4_H7"
## [114] "2023_8_4_H8"
## [115] "2023_8_4_H9"
## [116] "Ba001"
## [117] "Ba002"
## [118] "Ba003"
## [119] "Bb001"
## [120] "Bb002"
## [121] "Bb003"
## [122] "Bb004"
## [123] "Bb005"
## [124] "Bb007"
## [125] "Bb008"
## [126] "Bb009"
## [127] "Bb010"
## [128] "Bb011"
## [129] "Bb012"
## [130] "Bb013"
## [131] "Bb014"
## [132] "Bb015"
## [133] "Bb016"
## [134] "Bb017"
## [135] "Bb018"
## [136] "Bb019"
## [137] "Bb020"
## [138] "Bb021"
## [139] "Bb022"
## [140] "Bb023"
## [141] "Bb024"
## [142] "Bb025"
## [143] "Bf001"
## [144] "Bf002"
## [145] "Bf003"
## [146] "Bf004"
## [147] "Bg001"
## [148] "Bg002"
## [149] "Bg003"
## [150] "Bg004"
## [151] "Bg005"
## [152] "Bg006"
## [153] "Bg007"
## [154] "Bg008"
## [155] "Bg009"
## [156] "Bg010"
## [157] "Bg011"
## [158] "Bg012"
## [159] "Bg013"
## [160] "Bg014"
## [161] "Bg015"
## [162] "Bg016"
## [163] "Bg017"
## [164] "Bg018"
## [165] "Bg019"
## [166] "Bi001"
## [167] "Bi002"
## [168] "Bi003"
## [169] "Bi004"
## [170] "Bi005"
## [171] "Bi006"
## [172] "Bi007"
## [173] "CKC0001"
## [174] "ESE0004"
## [175] "KLS0007"
## [176] "KLS0027"
## [177] "KLS0044"
## [178] "KLS0045"
## [179] "KLS0052"
## [180] "KLS0054"
## [181] "KLS0055"
## [182] "KLS0071"
## [183] "KLS0095"
## [184] "KLS0096"
## [185] "KLS0105"
## [186] "KLS0106"
## [187] "KLS0119"
## [188] "KLS0134"
## [189] "KLS0135"
## [190] "KLS0136"
## [191] "KLS0137"
## [192] "KLS0138"
## [193] "KLS0139"
## [194] "KLS0150"
## [195] "KLS0153"
## [196] "KLS0155"
## [197] "KLS0156"
## [198] "KLS0159"
## [199] "KLS0163"
## [200] "KLS0165"
## [201] "KLS0167"
## [202] "KLS0168"
## [203] "KLS0169"
## [204] "KLS0170"
## [205] "KLS0200"
## [206] "KLS0201"
## [207] "KLS0205"
## [208] "KLS0209"
## [209] "KLS0221"
## [210] "KLS0224"
## [211] "KLS0225"
## [212] "KLS0227"
## [213] "KLS0241"
## [214] "KLS0244"
## [215] "KLS0246"
## [216] "KLS0248"
## [217] "KLS0253"
## [218] "KLS0254"
## [219] "KLS0256"
## [220] "KLS0259"
## [221] "KLS0263"
## [222] "KLS0266"
## [223] "KLS0272"
## [224] "SCA0009"
## [225] "SCA0010"
## [226] "SCA0013"
## [227] "ext_neg_ctrl_20230909"
## [228] "ext_neg_ctrl_20231007"
## [229] "ext_neg_ctrl_20231008"
## [230] "ext_neg_ctrl_2024220A"
## [231] "ext_neg_ctrl_2024220B"
## [232] "ext_neg_ctrl_2024221A"
## [233] "ext_neg_ctrl_2024221B"
## [234] "ext_neg_ctrl_2024222A"
## [235] "ext_neg_ctrl_2024222B"
## [236] "ext_neg_ctrl_2024312A"
## [237] "ext_neg_ctrl_2024312B"
## [238] "ext_neg_ctrl_2024314A"
## [239] "ext_neg_ctrl_2024314B"
## [240] "ext_neg_ctrl_2024319"
## [241] "pcr_rbcL_neg_crtl_20240417"
## [242] "pcr_rbcL_neg_ctrl_20240409"
## [243] "pcr_rbcL_neg_ctrl_20240418A"
## [244] "pcr_rbcL_neg_ctrl_20240418B"
## [245] "pcr_rbcL_neg_ctrl_20240523"
## [246] "rbcL_pcr_neg_ctrl_20231021_20231119"
identical(substr(rownames(seqtab.nochim.nocontam), 6, 100), rownames(seqtab.nochim)) #they are not in the same order, but this is expected because we had previously reordered nocontam according to total reads
## [1] FALSE
match(substr(rownames(seqtab.nochim.nocontam), 6, 100), rownames(seqtab.nochim)) #returns a vector of the positions of (first) matches of its first argument in its second
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 189 190 191 192 193 194
## [181] 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
## [199] 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
## [217] 231 232 233 234 235 236 237 244 245 246 175 176 177 178 179 180 181 182
## [235] 183 184 185 186 187 188 238 239 240 241 242 243
index<-paste0("rbcL_",rownames(seqtab.nochim)) #save the order of sample names in nochim (and paste rbcL_ in front) as an index for reordering nocontam
seqtab.nochim.nocontam <- seqtab.nochim.nocontam[paste0("rbcL_",rownames(seqtab.nochim)),,drop=FALSE] #reorder nocontam based on order of sample names in nochim
identical(substr(rownames(seqtab.nochim.nocontam),6,100),rownames(seqtab.nochim)) #true! they match exactly
## [1] TRUE
We now inspect the the number of reads that made it through each step in the pipeline to verify everything worked as expected.
# just checking how many samples, reads at various stages
head(out)
## reads.in reads.out
## 2020_6_16_H1 16617 9241
## 2020_6_16_H5 9459 5929
## 2020_6_16_H6 1296 850
## 2020_6_17_H2 6917 4559
## 2020_6_17_H4 3027 2027
## 2020_6_17_H8 24212 15437
length(out)/2
## [1] 258
length(dadaFs)
## [1] 246
length(dadaRs)
## [1] 246
length(mergers)
## [1] 246
length(rowSums(seqtab.nochim))
## [1] 246
length(rowSums(seqtab.nochim.nocontam))
## [1] 246
length(sample.names)
## [1] 246
getN <- function(x) sum(getUniques(x))
track <- cbind(out[names(derepFs),], # i only want the samples from "out" which appear in "derepFs" (but in the original tutorial code, you would just call for "out" here)
sapply(dadaFs, getN), # If processing a single sample, replace with getN(dadaFs)
sapply(dadaRs, getN),
sapply(mergers, getN),
rowSums(seqtab.nochim),
rowSums(seqtab.nochim.nocontam)
)
colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim", "nocontam")
rownames(track) <- sample.names
track
## input filtered denoisedF denoisedR merged
## 2020_6_16_H1 16617 9241 9210 9190 8965
## 2020_6_16_H5 9459 5929 5806 5833 4858
## 2020_6_16_H6 1296 850 842 831 761
## 2020_6_17_H2 6917 4559 4480 4452 2982
## 2020_6_17_H4 3027 2027 1988 1991 1830
## 2020_6_17_H8 24212 15437 15362 15357 14791
## 2020_6_18_H3 6564 4021 3931 3977 3247
## 2020_6_18_H7 12593 8636 8594 8594 8339
## 2020_6_18_H9 7250 4778 4769 4736 4320
## 2020_6_3_H1 23986 15258 15182 15112 13957
## 2020_6_3_H5 28537 17366 17225 17210 15540
## 2020_6_3_H6 29641 17405 17308 17342 16216
## 2020_6_30_H1 18481 12116 12070 12040 11003
## 2020_6_30_H5 18531 12313 12271 12244 11620
## 2020_6_30_H6 18275 12755 12710 12692 12018
## 2020_6_4_H2 1402 851 836 832 567
## 2020_6_4_H4 556 350 324 342 284
## 2020_6_4_H8 13252 8162 8027 8033 7061
## 2020_6_5_H3 25128 16183 16101 16054 15185
## 2020_6_5_H7 21045 13260 13165 13190 12306
## 2020_6_5_H9 19654 12153 12098 12087 11510
## 2020_7_1_H2 19567 13295 13205 13151 11719
## 2020_7_1_H4 4 2 1 1 0
## 2020_7_1_H8 19719 13616 13542 13549 12827
## 2020_7_14_H5 18 1 1 1 1
## 2020_7_14_H6 1 1 1 1 0
## 2020_7_15_H8 14331 8785 8768 8745 8601
## 2020_7_16_H3 54296 36464 36325 36264 35105
## 2020_7_16_H7 12751 8836 8797 8765 8555
## 2020_7_16_H9 29 1 1 1 0
## 2020_7_2_H3 10763 7246 7220 7209 6958
## 2020_7_2_H7 14383 9836 9772 9776 8993
## 2020_7_2_H9 20528 14023 13977 13962 13210
## 2021_6_13_H1 21810 14260 14231 14164 13769
## 2021_6_13_H3 17070 11753 11640 11648 10503
## 2021_6_14_H11 18063 12176 12122 12103 11736
## 2021_6_14_H6 17750 11728 11618 11618 10687
## 2021_6_14_H7 15547 10651 10625 10602 10452
## 2021_6_15_H8 18595 12915 12890 12832 12426
## 2021_6_21_H10 23512 16187 16141 16075 15566
## 2021_6_21_H12 25343 17032 16983 16933 16581
## 2021_6_21_H9 21263 13821 13701 13756 13193
## 2021_6_27_H21 23331 15729 15602 15655 15055
## 2021_6_27_H22 27932 19232 19227 19192 19176
## 2021_6_27_H27 18354 12550 12441 12400 11605
## 2021_6_28_H25 14755 10169 10046 10083 9643
## 2021_6_28_H26 9038 6196 6168 6128 5787
## 2021_6_28_H28 14931 10332 10299 10225 9722
## 2021_6_29_H17 14135 9104 9069 9057 8741
## 2021_6_29_H23 17111 11907 11798 11823 11174
## 2021_6_29_H24 1100 715 707 696 664
## 2021_6_4_H21 17980 11218 11155 11132 10227
## 2021_6_4_H22 8144 4717 4687 4646 4340
## 2021_6_4_H27 7028 4547 4528 4502 4188
## 2021_6_5_H18 1897 1152 1146 1130 920
## 2021_6_5_H25 4496 2834 2760 2759 1828
## 2021_6_5_H26 5364 3400 3347 3326 2883
## 2021_6_6_H17 11941 7362 7282 7254 6788
## 2021_6_6_H24 18401 11939 11864 11838 10565
## 2021_6_7_H23 17153 10457 10411 10354 9631
## 2021_7_14_H10 32736 22653 22639 22627 22525
## 2021_7_14_H12 7 4 3 3 0
## 2021_7_20_H27 14261 9282 9227 9219 8856
## 2021_7_21_H25 17648 12630 12616 12582 12409
## 2021_7_21_H26 3 1 1 1 0
## 2021_7_6_H11 21218 13905 13802 13849 13142
## 2021_7_6_H6 20087 13693 13489 13552 12548
## 2021_7_7_H8 23042 15342 15321 15302 15258
## 2021_7_8_H3 16549 11288 11239 11217 10998
## 2023_6_12_H3 10325 6412 6360 6365 6096
## 2023_6_12_H5 20681 14325 14287 14269 13614
## 2023_6_12_H7 13867 8964 8927 8908 8521
## 2023_6_13_H6 20034 12999 12946 12929 12552
## 2023_6_13_H8 12403 8254 8226 8216 7644
## 2023_6_13_H9 5140 3377 3353 3345 3164
## 2023_6_14_H3 21637 15274 15240 15193 14940
## 2023_6_14_H7 22097 14310 14241 14231 13267
## 2023_6_14_H9 18343 12646 12610 12584 12100
## 2023_6_16_H5 17693 11967 11885 11923 11626
## 2023_6_24_H6 13200 8763 8550 8518 7464
## 2023_6_24_H8 14775 9782 9689 9665 8716
## 2023_6_25_H2 20061 13012 12820 12801 11406
## 2023_6_25_H4 8140 5350 5200 5203 4293
## 2023_6_26_H1 8582 5962 5851 5857 5019
## 2023_6_26_H7 15559 10330 10091 10127 8600
## 2023_6_27_H3 9104 6278 6116 6109 5413
## 2023_6_27_H5 17235 11398 11094 11122 9709
## 2023_6_8_H1 14495 9094 8973 8990 8514
## 2023_6_8_H2 14863 10848 10825 10814 10661
## 2023_6_8_H4 19094 11980 11925 11897 10641
## 2023_6_9_H2 34007 23277 23086 23099 21511
## 2023_6_9_H4 10947 7104 6978 6966 5749
## 2023_7_15_H6 26987 18724 18598 18581 17494
## 2023_7_16_H4 18816 13047 12962 12981 12124
## 2023_7_17_H1 31619 21557 21417 21344 20032
## 2023_7_18_H3 10733 7624 7529 7454 6429
## 2023_7_18_H7 12607 8641 8585 8555 8191
## 2023_7_29_H5 12607 8843 8807 8801 8426
## 2023_7_29_H7 27811 19278 19147 19176 18638
## 2023_7_30_H8 12062 8704 8695 8662 8604
## 2023_7_30_H9 6304 4403 4353 4360 4010
## 2023_7_5_H1 9600 6719 6676 6674 6483
## 2023_7_5_H2 5936 4171 4149 4142 3999
## 2023_7_6_H6 16841 11259 11183 11189 10538
## 2023_7_6_H8 29393 19324 19088 19045 18240
## 2023_7_6_H9 31609 22375 22157 22151 21192
## 2023_7_8_H3 10194 7103 7076 7061 6815
## 2023_7_8_H5 17601 12450 12430 12415 12219
## 2023_7_8_H7 33206 22286 22199 22190 21478
## 2023_8_4_H2 22703 15351 14972 14891 14683
## 2023_8_4_H5 19081 13489 13113 13162 11651
## 2023_8_4_H6 17531 11447 11066 10976 10672
## 2023_8_4_H7 31660 21306 20936 20932 17814
## 2023_8_4_H8 17954 11628 11589 11536 11203
## 2023_8_4_H9 524 344 309 291 173
## Ba001 21111 11589 11547 11548 11335
## Ba002 12744 8068 7957 7889 7025
## Ba003 11874 7634 7604 7602 7459
## Bb001 21681 13734 13698 13681 13454
## Bb002 29960 19049 19013 18970 18691
## Bb003 38886 25904 25777 25716 24659
## Bb004 35805 23070 22899 22862 21810
## Bb005 18006 11519 11436 11451 10926
## Bb007 19551 12125 12050 12031 11479
## Bb008 10374 6567 6518 6546 6219
## Bb009 13681 8599 8545 8559 7897
## Bb010 17683 10777 10693 10698 9948
## Bb011 12934 8051 7954 7846 6730
## Bb012 42681 27231 27095 27120 25706
## Bb013 11694 6172 6087 6070 5640
## Bb014 25104 15434 15360 15361 14739
## Bb015 11070 7061 6998 6950 6461
## Bb016 9634 6078 5987 6002 5604
## Bb017 12577 7797 7765 7701 7120
## Bb018 25614 16225 16097 16092 14390
## Bb019 31221 19040 18970 18977 18257
## Bb020 31931 20153 20124 20118 19904
## Bb021 10781 6625 6530 6509 5941
## Bb022 23477 15270 15257 15176 14903
## Bb023 12672 8291 8241 8232 7803
## Bb024 22426 15599 15567 15539 15334
## Bb025 16294 9402 9355 9272 8909
## Bf001 26481 18214 18142 18069 17277
## Bf002 11 4 3 2 2
## Bf003 17101 10825 10810 10791 10408
## Bf004 8560 5494 5444 5443 4920
## Bg001 11540 6774 6739 6712 6498
## Bg002 15770 9593 9448 9485 8520
## Bg003 15898 9878 9781 9748 8735
## Bg004 13830 8208 8174 8095 7718
## Bg005 17850 11754 11652 11639 10832
## Bg006 26159 14258 14174 14148 13762
## Bg007 20463 13163 13130 13113 12790
## Bg008 20146 12164 12036 12033 11008
## Bg009 10615 5238 5170 5141 4349
## Bg010 10918 6285 6201 6202 5331
## Bg011 16326 10184 10083 10063 9182
## Bg012 12514 7157 7132 7089 6877
## Bg013 21541 12665 12641 12624 12339
## Bg014 18821 12895 12866 12835 12641
## Bg015 11683 6987 6880 6899 6085
## Bg016 8777 5489 5435 5383 4512
## Bg017 9272 5605 5550 5539 5255
## Bg018 10971 6740 6707 6602 5896
## Bg019 13995 8707 8675 8648 8222
## Bi001 17228 10410 10360 10338 10061
## Bi002 21579 13359 13263 13248 12684
## Bi003 31512 20553 20368 20383 19108
## Bi004 31006 18623 18578 18561 17896
## Bi005 231 149 149 149 149
## Bi006 7576 4554 4485 4491 4144
## Bi007 15583 9976 9803 9857 8545
## CKC0001 14879 7833 7821 7806 7779
## ESE0004 14474 9116 9071 9032 8599
## ext_neg_ctrl_20230909 7 3 3 3 3
## ext_neg_ctrl_20231007 6 3 1 1 0
## ext_neg_ctrl_20231008 12 1 1 1 0
## ext_neg_ctrl_2024220A 122 37 27 29 27
## ext_neg_ctrl_2024220B 3295 2165 2165 2160 2144
## ext_neg_ctrl_2024221A 4432 2878 2876 2866 2821
## ext_neg_ctrl_2024221B 965 622 618 618 614
## ext_neg_ctrl_2024222A 1298 823 823 818 799
## ext_neg_ctrl_2024222B 9567 6313 6306 6278 6184
## ext_neg_ctrl_2024312A 1 1 1 1 1
## ext_neg_ctrl_2024312B 19 6 4 1 0
## ext_neg_ctrl_2024314A 8 1 1 1 1
## ext_neg_ctrl_2024314B 6 1 1 1 0
## ext_neg_ctrl_2024319 321 180 179 175 175
## KLS0007 14456 8407 8298 8272 6860
## KLS0027 12663 7425 7348 7374 6783
## KLS0044 1683 941 927 922 766
## KLS0045 30890 16853 16780 16733 16093
## KLS0052 1867 1057 1057 1052 940
## KLS0054 1859 991 984 981 978
## KLS0055 2718 1345 1343 1337 1337
## KLS0071 16589 10060 10036 10009 9979
## KLS0095 25992 18665 18599 18598 18351
## KLS0096 18985 11780 11767 11666 11311
## KLS0105 30704 20059 20023 20025 19757
## KLS0106 21639 13590 13511 13428 12565
## KLS0119 18103 10904 10886 10821 10561
## KLS0134 14814 10331 10273 10244 9992
## KLS0135 2499 1762 1743 1744 1696
## KLS0136 14868 10072 10028 9985 9599
## KLS0137 35079 21450 21279 21251 20079
## KLS0138 15471 10763 10691 10689 10400
## KLS0139 15646 10490 10352 10411 9792
## KLS0150 20779 13943 13870 13799 13361
## KLS0153 4518 3156 3096 3071 2838
## KLS0155 24698 15382 15337 15287 14857
## KLS0156 38485 27268 27185 27143 26398
## KLS0159 29323 19126 19041 18995 18135
## KLS0163 12985 8259 8140 8140 7228
## KLS0165 11827 7382 7292 7288 6811
## KLS0167 19510 12356 12174 12254 11137
## KLS0168 14958 9986 9974 9946 9854
## KLS0169 27471 18064 17958 17904 16743
## KLS0170 14696 9654 9601 9563 9242
## KLS0200 22264 14566 14546 14519 14436
## KLS0201 37844 23716 23574 23573 22697
## KLS0205 18576 12367 12237 12283 11685
## KLS0209 4309 2911 2863 2881 2637
## KLS0221 13628 8200 8124 8082 7470
## KLS0224 17970 11029 11014 10979 10857
## KLS0225 23809 15454 15374 15340 14527
## KLS0227 14436 9704 9677 9664 9450
## KLS0241 23414 13922 13877 13831 10693
## KLS0244 18600 12161 12043 12004 11180
## KLS0246 8419 5407 5325 5285 4622
## KLS0248 38123 23791 23723 23639 22213
## KLS0253 23821 14833 14730 14767 13987
## KLS0254 14063 9069 9000 8960 8393
## KLS0256 40058 26403 26269 26300 25267
## KLS0259 20424 14649 14583 14590 14312
## KLS0263 2180 1481 1446 1449 1373
## KLS0266 1355 896 880 870 760
## KLS0272 1546 977 948 920 649
## pcr_rbcL_neg_crtl_20240417 3607 2444 2442 2435 2387
## pcr_rbcL_neg_ctrl_20240409 739 462 456 455 455
## pcr_rbcL_neg_ctrl_20240418A 13703 8900 8877 8865 8326
## pcr_rbcL_neg_ctrl_20240418B 65 43 41 42 41
## pcr_rbcL_neg_ctrl_20240523 8 1 1 1 1
## rbcL_pcr_neg_ctrl_20231021_20231119 4 3 3 3 3
## SCA0009 29024 18374 18307 18239 17203
## SCA0010 25298 13370 13327 13300 12666
## SCA0013 28147 18429 18350 18294 17553
## nonchim nocontam
## 2020_6_16_H1 8279 8177
## 2020_6_16_H5 2607 2483
## 2020_6_16_H6 624 624
## 2020_6_17_H2 1457 1401
## 2020_6_17_H4 1552 565
## 2020_6_17_H8 12591 10665
## 2020_6_18_H3 1815 1378
## 2020_6_18_H7 6681 6286
## 2020_6_18_H9 3374 3374
## 2020_6_3_H1 9942 9942
## 2020_6_3_H5 8789 8789
## 2020_6_3_H6 10650 10650
## 2020_6_30_H1 7926 6732
## 2020_6_30_H5 7195 5406
## 2020_6_30_H6 8380 8361
## 2020_6_4_H2 385 376
## 2020_6_4_H4 128 128
## 2020_6_4_H8 2555 2555
## 2020_6_5_H3 11929 11898
## 2020_6_5_H7 8294 8167
## 2020_6_5_H9 7920 7904
## 2020_7_1_H2 8040 5782
## 2020_7_1_H4 0 0
## 2020_7_1_H8 9310 9310
## 2020_7_14_H5 0 0
## 2020_7_14_H6 0 0
## 2020_7_15_H8 6488 6455
## 2020_7_16_H3 27551 8157
## 2020_7_16_H7 7549 5478
## 2020_7_16_H9 0 0
## 2020_7_2_H3 5700 4849
## 2020_7_2_H7 6642 4713
## 2020_7_2_H9 9961 7286
## 2021_6_13_H1 10348 10348
## 2021_6_13_H3 6680 6602
## 2021_6_14_H11 10904 10901
## 2021_6_14_H6 7844 7350
## 2021_6_14_H7 9317 9317
## 2021_6_15_H8 9964 9302
## 2021_6_21_H10 11744 8315
## 2021_6_21_H12 13511 12582
## 2021_6_21_H9 10439 8404
## 2021_6_27_H21 9185 5980
## 2021_6_27_H22 19164 19164
## 2021_6_27_H27 6141 5137
## 2021_6_28_H25 6965 6695
## 2021_6_28_H26 3640 802
## 2021_6_28_H28 6997 3915
## 2021_6_29_H17 7297 7297
## 2021_6_29_H23 7563 5425
## 2021_6_29_H24 617 582
## 2021_6_4_H21 7094 6994
## 2021_6_4_H22 2999 2731
## 2021_6_4_H27 3177 3128
## 2021_6_5_H18 449 362
## 2021_6_5_H25 774 725
## 2021_6_5_H26 1760 1601
## 2021_6_6_H17 4725 3650
## 2021_6_6_H24 7054 6076
## 2021_6_7_H23 7382 6575
## 2021_7_14_H10 21714 11236
## 2021_7_14_H12 0 0
## 2021_7_20_H27 8309 6414
## 2021_7_21_H25 11212 11210
## 2021_7_21_H26 0 0
## 2021_7_6_H11 9544 8894
## 2021_7_6_H6 8352 5715
## 2021_7_7_H8 14781 14781
## 2021_7_8_H3 9647 9647
## 2023_6_12_H3 5634 5613
## 2023_6_12_H5 10982 10976
## 2023_6_12_H7 6500 6500
## 2023_6_13_H6 9514 7280
## 2023_6_13_H8 5040 4468
## 2023_6_13_H9 2599 2599
## 2023_6_14_H3 10736 10693
## 2023_6_14_H7 8631 8560
## 2023_6_14_H9 8873 8860
## 2023_6_16_H5 9508 5727
## 2023_6_24_H6 4954 1172
## 2023_6_24_H8 5793 1864
## 2023_6_25_H2 7663 4178
## 2023_6_25_H4 2442 878
## 2023_6_26_H1 2779 874
## 2023_6_26_H7 5443 2535
## 2023_6_27_H3 3938 1914
## 2023_6_27_H5 6623 4960
## 2023_6_8_H1 6601 6601
## 2023_6_8_H2 8098 8098
## 2023_6_8_H4 8210 8210
## 2023_6_9_H2 14406 8875
## 2023_6_9_H4 3123 3123
## 2023_7_15_H6 13934 2963
## 2023_7_16_H4 10299 1411
## 2023_7_17_H1 14924 3362
## 2023_7_18_H3 3232 1419
## 2023_7_18_H7 7201 873
## 2023_7_29_H5 6143 4478
## 2023_7_29_H7 14458 13571
## 2023_7_30_H8 8449 8443
## 2023_7_30_H9 2728 1957
## 2023_7_5_H1 5314 5250
## 2023_7_5_H2 2501 1793
## 2023_7_6_H6 8076 2387
## 2023_7_6_H8 12899 4945
## 2023_7_6_H9 15133 4645
## 2023_7_8_H3 5136 2405
## 2023_7_8_H5 11004 1748
## 2023_7_8_H7 16530 5152
## 2023_8_4_H2 13926 13845
## 2023_8_4_H5 6645 4455
## 2023_8_4_H6 10165 10109
## 2023_8_4_H7 10218 10168
## 2023_8_4_H8 10096 7856
## 2023_8_4_H9 148 69
## Ba001 10735 2683
## Ba002 5874 5843
## Ba003 7290 7290
## Bb001 12686 12686
## Bb002 17920 17920
## Bb003 20886 20886
## Bb004 16668 16568
## Bb005 9485 9485
## Bb007 9314 9314
## Bb008 5416 5416
## Bb009 6545 6545
## Bb010 8729 2199
## Bb011 4980 3914
## Bb012 20156 20109
## Bb013 5121 5097
## Bb014 12842 12842
## Bb015 5575 5575
## Bb016 5204 5204
## Bb017 5388 5379
## Bb018 8472 4683
## Bb019 16352 16131
## Bb020 19859 19846
## Bb021 5627 5131
## Bb022 14011 14011
## Bb023 5929 5929
## Bb024 14775 14725
## Bb025 8429 475
## Bf001 16047 16028
## Bf002 2 2
## Bf003 8362 8362
## Bf004 3827 3827
## Bg001 6409 312
## Bg002 6296 3172
## Bg003 7275 2451
## Bg004 7541 7480
## Bg005 9368 9286
## Bg006 13380 2003
## Bg007 12149 11883
## Bg008 9618 3687
## Bg009 3160 3040
## Bg010 4258 4077
## Bg011 8073 1652
## Bg012 6559 462
## Bg013 11934 11775
## Bg014 12426 12426
## Bg015 4976 4976
## Bg016 3391 2777
## Bg017 4909 437
## Bg018 4658 1161
## Bg019 7516 620
## Bi001 9685 912
## Bi002 11735 11612
## Bi003 16124 16031
## Bi004 16694 16615
## Bi005 149 149
## Bi006 3862 3862
## Bi007 5970 5970
## CKC0001 7745 7745
## ESE0004 7488 458
## ext_neg_ctrl_20230909 3 3
## ext_neg_ctrl_20231007 0 0
## ext_neg_ctrl_20231008 0 0
## ext_neg_ctrl_2024220A 0 0
## ext_neg_ctrl_2024220B 1733 1093
## ext_neg_ctrl_2024221A 2215 645
## ext_neg_ctrl_2024221B 451 170
## ext_neg_ctrl_2024222A 677 276
## ext_neg_ctrl_2024222B 5438 2672
## ext_neg_ctrl_2024312A 0 0
## ext_neg_ctrl_2024312B 0 0
## ext_neg_ctrl_2024314A 0 0
## ext_neg_ctrl_2024314B 0 0
## ext_neg_ctrl_2024319 175 175
## KLS0007 5097 5097
## KLS0027 4897 4897
## KLS0044 584 584
## KLS0045 14281 14281
## KLS0052 610 610
## KLS0054 978 978
## KLS0055 1337 1337
## KLS0071 9895 9895
## KLS0095 18048 18048
## KLS0096 10370 10370
## KLS0105 19096 19096
## KLS0106 10127 10127
## KLS0119 10030 10030
## KLS0134 9671 9671
## KLS0135 1679 1679
## KLS0136 7675 7675
## KLS0137 17127 17127
## KLS0138 10060 10060
## KLS0139 8704 8704
## KLS0150 6269 6269
## KLS0153 2572 2572
## KLS0155 13716 13716
## KLS0156 22161 22161
## KLS0159 15892 15892
## KLS0163 6614 6614
## KLS0165 6442 6442
## KLS0167 8730 8730
## KLS0168 9667 9667
## KLS0169 13848 13848
## KLS0170 8583 8583
## KLS0200 14417 14417
## KLS0201 20647 20647
## KLS0205 8580 8580
## KLS0209 2388 2388
## KLS0221 6793 6793
## KLS0224 10778 10778
## KLS0225 13061 13061
## KLS0227 9319 9319
## KLS0241 7979 7979
## KLS0244 9360 9360
## KLS0246 3233 3233
## KLS0248 15023 15023
## KLS0253 10099 10099
## KLS0254 7769 5585
## KLS0256 20145 6456
## KLS0259 13930 13930
## KLS0263 1330 1330
## KLS0266 659 659
## KLS0272 572 440
## pcr_rbcL_neg_crtl_20240417 1942 1430
## pcr_rbcL_neg_ctrl_20240409 455 455
## pcr_rbcL_neg_ctrl_20240418A 7333 3917
## pcr_rbcL_neg_ctrl_20240418B 41 41
## pcr_rbcL_neg_ctrl_20240523 0 0
## rbcL_pcr_neg_ctrl_20231021_20231119 3 3
## SCA0009 14600 14600
## SCA0010 11500 11500
## SCA0013 15651 15619
head(track)
## input filtered denoisedF denoisedR merged nonchim nocontam
## 2020_6_16_H1 16617 9241 9210 9190 8965 8279 8177
## 2020_6_16_H5 9459 5929 5806 5833 4858 2607 2483
## 2020_6_16_H6 1296 850 842 831 761 624 624
## 2020_6_17_H2 6917 4559 4480 4452 2982 1457 1401
## 2020_6_17_H4 3027 2027 1988 1991 1830 1552 565
## 2020_6_17_H8 24212 15437 15362 15357 14791 12591 10665
track<-as.data.frame(track)
library(tidyverse)
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse() masks Biostrings::collapse(), IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ purrr::compact() masks XVector::compact()
## ✖ purrr::compose() masks ShortRead::compose()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks GenomicAlignments::first(), S4Vectors::first()
## ✖ dplyr::id() masks ShortRead::id()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks GenomicAlignments::last()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks GenomicAlignments::second(), S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice() masks XVector::slice(), IRanges::slice()
## ✖ tibble::view() masks ShortRead::view()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
head(track %>% mutate(loss=(input-nocontam)/input)) # calculate % of reads lost from input to final non-chimeric reads
## input filtered denoisedF denoisedR merged nonchim nocontam
## 2020_6_16_H1 16617 9241 9210 9190 8965 8279 8177
## 2020_6_16_H5 9459 5929 5806 5833 4858 2607 2483
## 2020_6_16_H6 1296 850 842 831 761 624 624
## 2020_6_17_H2 6917 4559 4480 4452 2982 1457 1401
## 2020_6_17_H4 3027 2027 1988 1991 1830 1552 565
## 2020_6_17_H8 24212 15437 15362 15357 14791 12591 10665
## loss
## 2020_6_16_H1 0.5079136
## 2020_6_16_H5 0.7374987
## 2020_6_16_H6 0.5185185
## 2020_6_17_H2 0.7974555
## 2020_6_17_H4 0.8133465
## 2020_6_17_H8 0.5595159
head(track %>% filter(str_starts(rownames(.),'ext')|str_starts(rownames(.),'pcr')|str_starts(rownames(.),'rbcL'))) # select just negative control samples
## input filtered denoisedF denoisedR merged nonchim
## ext_neg_ctrl_20230909 7 3 3 3 3 3
## ext_neg_ctrl_20231007 6 3 1 1 0 0
## ext_neg_ctrl_20231008 12 1 1 1 0 0
## ext_neg_ctrl_2024220A 122 37 27 29 27 0
## ext_neg_ctrl_2024220B 3295 2165 2165 2160 2144 1733
## ext_neg_ctrl_2024221A 4432 2878 2876 2866 2821 2215
## nocontam
## ext_neg_ctrl_20230909 3
## ext_neg_ctrl_20231007 0
## ext_neg_ctrl_20231008 0
## ext_neg_ctrl_2024220A 0
## ext_neg_ctrl_2024220B 1093
## ext_neg_ctrl_2024221A 645
# calculate mean and sd for number of reads at each step, separated between negative control and unknown samples
t(track %>%
mutate(loss=(input-nocontam)/input) %>%
group_by(NegCtrl=str_starts(rownames(.),'ext') | str_starts(rownames(.),'pcr') | str_starts(rownames(.),'rbcL')) %>%
summarize(across(input:loss, list(mean=mean, sd=sd), .names="{.col}.{.fn}")) %>% round(.,digits=2))
## [,1] [,2]
## NegCtrl 0.00 1.00
## input.mean 16831.54 1909.25
## input.sd 9406.71 3650.45
## filtered.mean 10900.73 1244.35
## filtered.sd 6188.45 2388.04
## denoisedF.mean 10826.32 1241.30
## denoisedF.sd 6163.43 2383.87
## denoisedR.mean 10809.44 1237.70
## denoisedR.sd 6158.79 2378.23
## merged.mean 10215.85 1199.10
## merged.sd 5968.33 2274.75
## nonchim.mean 8306.65 1023.30
## nonchim.sd 5133.02 1986.58
## nocontam.mean 6864.17 544.00
## nocontam.sd 5099.99 1040.34
## loss.mean 0.61 0.76
## loss.sd 0.18 0.25
detach("package:tidyverse") #detaching to avoid conflicts... I'll reload it later when I make plots after taxonomic assignment
The DADA2 package provides a native implementation of the naive Bayesian classifier method for taxonomic assignment. The assignTaxonomy function takes as input a set of sequences to ba classified, and a training set of reference sequences with known taxonomy, and outputs taxonomic assignments with at least minBoot bootstrap confidence.
RBCL Database: Bell, Karen (2021). rbcL July 2021. figshare. Dataset. https://doi.org/10.6084/m9.figshare.14936007.v1
“We downloaded all available seed plant rbcL sequences (as of 27 January 2016) from NCBI, using the following search: (rbcL[Gene Name] AND 50:400000000[Sequence Length]) AND”seed plants”[porgn:_txid58024]. This included sequences that were predominantly rbcL, sequences with a small fragment of rbcL sequence along with a longer sequence of intergenic spacer, and complete ptDNA genomes.” (Bell et al 2017)
rbcL.ref.spp<-"/scratch/kls7sg/Bioinformatics/ReferenceDatabases/rbcL-KarenBell_2021.07.08/rbcL_plus_Nov2019adds_Jul2021corrections.dada2.species.fa"
rbcL.ref.tax<-"/scratch/kls7sg/Bioinformatics/ReferenceDatabases/rbcL-KarenBell_2021.07.08/rbcL_plus_Nov2019adds_Jul2021corrections.dada2.fa"
# start 3:08 pm, finished like 4:41pm (locally)
Sys.time(); t1=Sys.time()
## [1] "2024-11-06 17:44:56 EST"
taxa.rbcl <- assignTaxonomy(getSequences(seqtab.nochim.nocontam), rbcL.ref.tax, multithread = TRUE)
## Warning in .Call2("fasta_index", filexp_list, nrec, skip, seek.first.rec, :
## reading FASTA file
## /scratch/kls7sg/Bioinformatics/ReferenceDatabases/rbcL-KarenBell_2021.07.08/rbcL_plus_Nov2019adds_Jul2021corrections.dada2.fa:
## ignored 24379 invalid one-letter sequence codes
Sys.time(); t2=Sys.time()
## [1] "2024-11-06 17:45:37 EST"
#if your reference file is in the incorrect format for assignTaxonomy, check out this webpage: https://benjjneb.github.io/dada2/training.html
t2-t1
## Time difference of 41.67148 secs
#Time difference of 36.02247 secs (on Rivanna with 24 cores and multithread=TRUE)
#Warning message:
#In .Call2("fasta_index", filexp_list, nrec, skip, seek.first.rec, :
# reading FASTA file /scratch/kls7sg/Bioinformatics/rbcL-KarenBell-2021.07.08/rbcL_plus_Nov2019adds_Jul2021corrections.dada2.fa: ignored 24379 invalid one-letter sequence codes
#This message indicates that you have non-ACGT characters in your custom taxonomy reference file. Is that file appropriately formatted? It's possible this could interfere with your results if the non-ACGT characters are needed to represent important taxa.
taxa.rbcl.print <- taxa.rbcl; rownames(taxa.rbcl.print) <- NULL # Removing sequence rownames for display only
head(taxa.rbcl.print)
## Kingdom Phylum Class
## [1,] "k__Viridiplantae_33090" "p__Streptophyta_35493" "c__sub__rosids_71275"
## [2,] "k__Viridiplantae_33090" "p__Streptophyta_35493" "c__sub__rosids_71275"
## [3,] "k__Viridiplantae_33090" "p__Streptophyta_35493" "c__sub__asterids_71274"
## [4,] "k__Viridiplantae_33090" "p__Streptophyta_35493" "c__sub__rosids_71275"
## [5,] "k__Viridiplantae_33090" "p__Streptophyta_35493" "c__sub__asterids_71274"
## [6,] "k__Viridiplantae_33090" "p__Streptophyta_35493" "c__sub__asterids_71274"
## Order Family Genus
## [1,] "o__Fabales_72025" "f__Fabaceae_3803" "g__Trifolium_3898"
## [2,] "o__Fabales_72025" "f__Fabaceae_3803" "g__Cercis_49800"
## [3,] "o__Lamiales_4143" "f__Plantaginaceae_156152" "g__Plantago_26867"
## [4,] "o__Malpighiales_3646" "f__Salicaceae_3688" "g__Salix_40685"
## [5,] "o__Lamiales_4143" "f__Lamiaceae_4136" "g__Lamium_53158"
## [6,] "o__Dipsacales_4199" "f__Caprifoliaceae_4200" "g__Lonicera_49606"
## Species
## [1,] "s__Trifolium repens_3899"
## [2,] "s__Cercis siliquastrum_49802"
## [3,] "s__Plantago lanceolata_39414"
## [4,] NA
## [5,] "s__Lamium purpureum_53164"
## [6,] "s__Lonicera maackii_51255"
rbcl.seq <- as.data.frame(t(seqtab.nochim.nocontam)) #sample sequence table; transpose columns to rows (so each sequence appears as a row)
rbcl.taxa <- as.data.frame(taxa.rbcl) #assigned sequence taxonomy
#do sample sequences appear in the same order as identified sequences?
identical(rownames(rbcl.seq), rownames(rbcl.taxa)) # is true, so we proceed
## [1] TRUE
match(rownames(rbcl.seq),rownames(rbcl.taxa)) #this function returns the index where the first argument matches the second argument; if the lists are identical, a sequential list of integers up to the total number of records being compared
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
# bind sample sequence table with assigned sequence taxonomy
rbcL.IDs <- cbind(rbcl.seq,rbcl.taxa)
# rownames(rbcL.IDs) <- NULL #remove ASV row names (skipping this for now)
#rename samples (remove "rbcL_")
names(rbcL.IDs)
## [1] "rbcL_2020_6_16_H1"
## [2] "rbcL_2020_6_16_H5"
## [3] "rbcL_2020_6_16_H6"
## [4] "rbcL_2020_6_17_H2"
## [5] "rbcL_2020_6_17_H4"
## [6] "rbcL_2020_6_17_H8"
## [7] "rbcL_2020_6_18_H3"
## [8] "rbcL_2020_6_18_H7"
## [9] "rbcL_2020_6_18_H9"
## [10] "rbcL_2020_6_3_H1"
## [11] "rbcL_2020_6_3_H5"
## [12] "rbcL_2020_6_3_H6"
## [13] "rbcL_2020_6_30_H1"
## [14] "rbcL_2020_6_30_H5"
## [15] "rbcL_2020_6_30_H6"
## [16] "rbcL_2020_6_4_H2"
## [17] "rbcL_2020_6_4_H4"
## [18] "rbcL_2020_6_4_H8"
## [19] "rbcL_2020_6_5_H3"
## [20] "rbcL_2020_6_5_H7"
## [21] "rbcL_2020_6_5_H9"
## [22] "rbcL_2020_7_1_H2"
## [23] "rbcL_2020_7_1_H4"
## [24] "rbcL_2020_7_1_H8"
## [25] "rbcL_2020_7_14_H5"
## [26] "rbcL_2020_7_14_H6"
## [27] "rbcL_2020_7_15_H8"
## [28] "rbcL_2020_7_16_H3"
## [29] "rbcL_2020_7_16_H7"
## [30] "rbcL_2020_7_16_H9"
## [31] "rbcL_2020_7_2_H3"
## [32] "rbcL_2020_7_2_H7"
## [33] "rbcL_2020_7_2_H9"
## [34] "rbcL_2021_6_13_H1"
## [35] "rbcL_2021_6_13_H3"
## [36] "rbcL_2021_6_14_H11"
## [37] "rbcL_2021_6_14_H6"
## [38] "rbcL_2021_6_14_H7"
## [39] "rbcL_2021_6_15_H8"
## [40] "rbcL_2021_6_21_H10"
## [41] "rbcL_2021_6_21_H12"
## [42] "rbcL_2021_6_21_H9"
## [43] "rbcL_2021_6_27_H21"
## [44] "rbcL_2021_6_27_H22"
## [45] "rbcL_2021_6_27_H27"
## [46] "rbcL_2021_6_28_H25"
## [47] "rbcL_2021_6_28_H26"
## [48] "rbcL_2021_6_28_H28"
## [49] "rbcL_2021_6_29_H17"
## [50] "rbcL_2021_6_29_H23"
## [51] "rbcL_2021_6_29_H24"
## [52] "rbcL_2021_6_4_H21"
## [53] "rbcL_2021_6_4_H22"
## [54] "rbcL_2021_6_4_H27"
## [55] "rbcL_2021_6_5_H18"
## [56] "rbcL_2021_6_5_H25"
## [57] "rbcL_2021_6_5_H26"
## [58] "rbcL_2021_6_6_H17"
## [59] "rbcL_2021_6_6_H24"
## [60] "rbcL_2021_6_7_H23"
## [61] "rbcL_2021_7_14_H10"
## [62] "rbcL_2021_7_14_H12"
## [63] "rbcL_2021_7_20_H27"
## [64] "rbcL_2021_7_21_H25"
## [65] "rbcL_2021_7_21_H26"
## [66] "rbcL_2021_7_6_H11"
## [67] "rbcL_2021_7_6_H6"
## [68] "rbcL_2021_7_7_H8"
## [69] "rbcL_2021_7_8_H3"
## [70] "rbcL_2023_6_12_H3"
## [71] "rbcL_2023_6_12_H5"
## [72] "rbcL_2023_6_12_H7"
## [73] "rbcL_2023_6_13_H6"
## [74] "rbcL_2023_6_13_H8"
## [75] "rbcL_2023_6_13_H9"
## [76] "rbcL_2023_6_14_H3"
## [77] "rbcL_2023_6_14_H7"
## [78] "rbcL_2023_6_14_H9"
## [79] "rbcL_2023_6_16_H5"
## [80] "rbcL_2023_6_24_H6"
## [81] "rbcL_2023_6_24_H8"
## [82] "rbcL_2023_6_25_H2"
## [83] "rbcL_2023_6_25_H4"
## [84] "rbcL_2023_6_26_H1"
## [85] "rbcL_2023_6_26_H7"
## [86] "rbcL_2023_6_27_H3"
## [87] "rbcL_2023_6_27_H5"
## [88] "rbcL_2023_6_8_H1"
## [89] "rbcL_2023_6_8_H2"
## [90] "rbcL_2023_6_8_H4"
## [91] "rbcL_2023_6_9_H2"
## [92] "rbcL_2023_6_9_H4"
## [93] "rbcL_2023_7_15_H6"
## [94] "rbcL_2023_7_16_H4"
## [95] "rbcL_2023_7_17_H1"
## [96] "rbcL_2023_7_18_H3"
## [97] "rbcL_2023_7_18_H7"
## [98] "rbcL_2023_7_29_H5"
## [99] "rbcL_2023_7_29_H7"
## [100] "rbcL_2023_7_30_H8"
## [101] "rbcL_2023_7_30_H9"
## [102] "rbcL_2023_7_5_H1"
## [103] "rbcL_2023_7_5_H2"
## [104] "rbcL_2023_7_6_H6"
## [105] "rbcL_2023_7_6_H8"
## [106] "rbcL_2023_7_6_H9"
## [107] "rbcL_2023_7_8_H3"
## [108] "rbcL_2023_7_8_H5"
## [109] "rbcL_2023_7_8_H7"
## [110] "rbcL_2023_8_4_H2"
## [111] "rbcL_2023_8_4_H5"
## [112] "rbcL_2023_8_4_H6"
## [113] "rbcL_2023_8_4_H7"
## [114] "rbcL_2023_8_4_H8"
## [115] "rbcL_2023_8_4_H9"
## [116] "rbcL_Ba001"
## [117] "rbcL_Ba002"
## [118] "rbcL_Ba003"
## [119] "rbcL_Bb001"
## [120] "rbcL_Bb002"
## [121] "rbcL_Bb003"
## [122] "rbcL_Bb004"
## [123] "rbcL_Bb005"
## [124] "rbcL_Bb007"
## [125] "rbcL_Bb008"
## [126] "rbcL_Bb009"
## [127] "rbcL_Bb010"
## [128] "rbcL_Bb011"
## [129] "rbcL_Bb012"
## [130] "rbcL_Bb013"
## [131] "rbcL_Bb014"
## [132] "rbcL_Bb015"
## [133] "rbcL_Bb016"
## [134] "rbcL_Bb017"
## [135] "rbcL_Bb018"
## [136] "rbcL_Bb019"
## [137] "rbcL_Bb020"
## [138] "rbcL_Bb021"
## [139] "rbcL_Bb022"
## [140] "rbcL_Bb023"
## [141] "rbcL_Bb024"
## [142] "rbcL_Bb025"
## [143] "rbcL_Bf001"
## [144] "rbcL_Bf002"
## [145] "rbcL_Bf003"
## [146] "rbcL_Bf004"
## [147] "rbcL_Bg001"
## [148] "rbcL_Bg002"
## [149] "rbcL_Bg003"
## [150] "rbcL_Bg004"
## [151] "rbcL_Bg005"
## [152] "rbcL_Bg006"
## [153] "rbcL_Bg007"
## [154] "rbcL_Bg008"
## [155] "rbcL_Bg009"
## [156] "rbcL_Bg010"
## [157] "rbcL_Bg011"
## [158] "rbcL_Bg012"
## [159] "rbcL_Bg013"
## [160] "rbcL_Bg014"
## [161] "rbcL_Bg015"
## [162] "rbcL_Bg016"
## [163] "rbcL_Bg017"
## [164] "rbcL_Bg018"
## [165] "rbcL_Bg019"
## [166] "rbcL_Bi001"
## [167] "rbcL_Bi002"
## [168] "rbcL_Bi003"
## [169] "rbcL_Bi004"
## [170] "rbcL_Bi005"
## [171] "rbcL_Bi006"
## [172] "rbcL_Bi007"
## [173] "rbcL_CKC0001"
## [174] "rbcL_ESE0004"
## [175] "rbcL_ext_neg_ctrl_20230909"
## [176] "rbcL_ext_neg_ctrl_20231007"
## [177] "rbcL_ext_neg_ctrl_20231008"
## [178] "rbcL_ext_neg_ctrl_2024220A"
## [179] "rbcL_ext_neg_ctrl_2024220B"
## [180] "rbcL_ext_neg_ctrl_2024221A"
## [181] "rbcL_ext_neg_ctrl_2024221B"
## [182] "rbcL_ext_neg_ctrl_2024222A"
## [183] "rbcL_ext_neg_ctrl_2024222B"
## [184] "rbcL_ext_neg_ctrl_2024312A"
## [185] "rbcL_ext_neg_ctrl_2024312B"
## [186] "rbcL_ext_neg_ctrl_2024314A"
## [187] "rbcL_ext_neg_ctrl_2024314B"
## [188] "rbcL_ext_neg_ctrl_2024319"
## [189] "rbcL_KLS0007"
## [190] "rbcL_KLS0027"
## [191] "rbcL_KLS0044"
## [192] "rbcL_KLS0045"
## [193] "rbcL_KLS0052"
## [194] "rbcL_KLS0054"
## [195] "rbcL_KLS0055"
## [196] "rbcL_KLS0071"
## [197] "rbcL_KLS0095"
## [198] "rbcL_KLS0096"
## [199] "rbcL_KLS0105"
## [200] "rbcL_KLS0106"
## [201] "rbcL_KLS0119"
## [202] "rbcL_KLS0134"
## [203] "rbcL_KLS0135"
## [204] "rbcL_KLS0136"
## [205] "rbcL_KLS0137"
## [206] "rbcL_KLS0138"
## [207] "rbcL_KLS0139"
## [208] "rbcL_KLS0150"
## [209] "rbcL_KLS0153"
## [210] "rbcL_KLS0155"
## [211] "rbcL_KLS0156"
## [212] "rbcL_KLS0159"
## [213] "rbcL_KLS0163"
## [214] "rbcL_KLS0165"
## [215] "rbcL_KLS0167"
## [216] "rbcL_KLS0168"
## [217] "rbcL_KLS0169"
## [218] "rbcL_KLS0170"
## [219] "rbcL_KLS0200"
## [220] "rbcL_KLS0201"
## [221] "rbcL_KLS0205"
## [222] "rbcL_KLS0209"
## [223] "rbcL_KLS0221"
## [224] "rbcL_KLS0224"
## [225] "rbcL_KLS0225"
## [226] "rbcL_KLS0227"
## [227] "rbcL_KLS0241"
## [228] "rbcL_KLS0244"
## [229] "rbcL_KLS0246"
## [230] "rbcL_KLS0248"
## [231] "rbcL_KLS0253"
## [232] "rbcL_KLS0254"
## [233] "rbcL_KLS0256"
## [234] "rbcL_KLS0259"
## [235] "rbcL_KLS0263"
## [236] "rbcL_KLS0266"
## [237] "rbcL_KLS0272"
## [238] "rbcL_pcr_rbcL_neg_crtl_20240417"
## [239] "rbcL_pcr_rbcL_neg_ctrl_20240409"
## [240] "rbcL_pcr_rbcL_neg_ctrl_20240418A"
## [241] "rbcL_pcr_rbcL_neg_ctrl_20240418B"
## [242] "rbcL_pcr_rbcL_neg_ctrl_20240523"
## [243] "rbcL_rbcL_pcr_neg_ctrl_20231021_20231119"
## [244] "rbcL_SCA0009"
## [245] "rbcL_SCA0010"
## [246] "rbcL_SCA0013"
## [247] "Kingdom"
## [248] "Phylum"
## [249] "Class"
## [250] "Order"
## [251] "Family"
## [252] "Genus"
## [253] "Species"
names(rbcL.IDs) <- sub("^rbcL_", "", names(rbcL.IDs)) #remove the "rbcL_" at beginning of column names
names(rbcL.IDs)
## [1] "2020_6_16_H1"
## [2] "2020_6_16_H5"
## [3] "2020_6_16_H6"
## [4] "2020_6_17_H2"
## [5] "2020_6_17_H4"
## [6] "2020_6_17_H8"
## [7] "2020_6_18_H3"
## [8] "2020_6_18_H7"
## [9] "2020_6_18_H9"
## [10] "2020_6_3_H1"
## [11] "2020_6_3_H5"
## [12] "2020_6_3_H6"
## [13] "2020_6_30_H1"
## [14] "2020_6_30_H5"
## [15] "2020_6_30_H6"
## [16] "2020_6_4_H2"
## [17] "2020_6_4_H4"
## [18] "2020_6_4_H8"
## [19] "2020_6_5_H3"
## [20] "2020_6_5_H7"
## [21] "2020_6_5_H9"
## [22] "2020_7_1_H2"
## [23] "2020_7_1_H4"
## [24] "2020_7_1_H8"
## [25] "2020_7_14_H5"
## [26] "2020_7_14_H6"
## [27] "2020_7_15_H8"
## [28] "2020_7_16_H3"
## [29] "2020_7_16_H7"
## [30] "2020_7_16_H9"
## [31] "2020_7_2_H3"
## [32] "2020_7_2_H7"
## [33] "2020_7_2_H9"
## [34] "2021_6_13_H1"
## [35] "2021_6_13_H3"
## [36] "2021_6_14_H11"
## [37] "2021_6_14_H6"
## [38] "2021_6_14_H7"
## [39] "2021_6_15_H8"
## [40] "2021_6_21_H10"
## [41] "2021_6_21_H12"
## [42] "2021_6_21_H9"
## [43] "2021_6_27_H21"
## [44] "2021_6_27_H22"
## [45] "2021_6_27_H27"
## [46] "2021_6_28_H25"
## [47] "2021_6_28_H26"
## [48] "2021_6_28_H28"
## [49] "2021_6_29_H17"
## [50] "2021_6_29_H23"
## [51] "2021_6_29_H24"
## [52] "2021_6_4_H21"
## [53] "2021_6_4_H22"
## [54] "2021_6_4_H27"
## [55] "2021_6_5_H18"
## [56] "2021_6_5_H25"
## [57] "2021_6_5_H26"
## [58] "2021_6_6_H17"
## [59] "2021_6_6_H24"
## [60] "2021_6_7_H23"
## [61] "2021_7_14_H10"
## [62] "2021_7_14_H12"
## [63] "2021_7_20_H27"
## [64] "2021_7_21_H25"
## [65] "2021_7_21_H26"
## [66] "2021_7_6_H11"
## [67] "2021_7_6_H6"
## [68] "2021_7_7_H8"
## [69] "2021_7_8_H3"
## [70] "2023_6_12_H3"
## [71] "2023_6_12_H5"
## [72] "2023_6_12_H7"
## [73] "2023_6_13_H6"
## [74] "2023_6_13_H8"
## [75] "2023_6_13_H9"
## [76] "2023_6_14_H3"
## [77] "2023_6_14_H7"
## [78] "2023_6_14_H9"
## [79] "2023_6_16_H5"
## [80] "2023_6_24_H6"
## [81] "2023_6_24_H8"
## [82] "2023_6_25_H2"
## [83] "2023_6_25_H4"
## [84] "2023_6_26_H1"
## [85] "2023_6_26_H7"
## [86] "2023_6_27_H3"
## [87] "2023_6_27_H5"
## [88] "2023_6_8_H1"
## [89] "2023_6_8_H2"
## [90] "2023_6_8_H4"
## [91] "2023_6_9_H2"
## [92] "2023_6_9_H4"
## [93] "2023_7_15_H6"
## [94] "2023_7_16_H4"
## [95] "2023_7_17_H1"
## [96] "2023_7_18_H3"
## [97] "2023_7_18_H7"
## [98] "2023_7_29_H5"
## [99] "2023_7_29_H7"
## [100] "2023_7_30_H8"
## [101] "2023_7_30_H9"
## [102] "2023_7_5_H1"
## [103] "2023_7_5_H2"
## [104] "2023_7_6_H6"
## [105] "2023_7_6_H8"
## [106] "2023_7_6_H9"
## [107] "2023_7_8_H3"
## [108] "2023_7_8_H5"
## [109] "2023_7_8_H7"
## [110] "2023_8_4_H2"
## [111] "2023_8_4_H5"
## [112] "2023_8_4_H6"
## [113] "2023_8_4_H7"
## [114] "2023_8_4_H8"
## [115] "2023_8_4_H9"
## [116] "Ba001"
## [117] "Ba002"
## [118] "Ba003"
## [119] "Bb001"
## [120] "Bb002"
## [121] "Bb003"
## [122] "Bb004"
## [123] "Bb005"
## [124] "Bb007"
## [125] "Bb008"
## [126] "Bb009"
## [127] "Bb010"
## [128] "Bb011"
## [129] "Bb012"
## [130] "Bb013"
## [131] "Bb014"
## [132] "Bb015"
## [133] "Bb016"
## [134] "Bb017"
## [135] "Bb018"
## [136] "Bb019"
## [137] "Bb020"
## [138] "Bb021"
## [139] "Bb022"
## [140] "Bb023"
## [141] "Bb024"
## [142] "Bb025"
## [143] "Bf001"
## [144] "Bf002"
## [145] "Bf003"
## [146] "Bf004"
## [147] "Bg001"
## [148] "Bg002"
## [149] "Bg003"
## [150] "Bg004"
## [151] "Bg005"
## [152] "Bg006"
## [153] "Bg007"
## [154] "Bg008"
## [155] "Bg009"
## [156] "Bg010"
## [157] "Bg011"
## [158] "Bg012"
## [159] "Bg013"
## [160] "Bg014"
## [161] "Bg015"
## [162] "Bg016"
## [163] "Bg017"
## [164] "Bg018"
## [165] "Bg019"
## [166] "Bi001"
## [167] "Bi002"
## [168] "Bi003"
## [169] "Bi004"
## [170] "Bi005"
## [171] "Bi006"
## [172] "Bi007"
## [173] "CKC0001"
## [174] "ESE0004"
## [175] "ext_neg_ctrl_20230909"
## [176] "ext_neg_ctrl_20231007"
## [177] "ext_neg_ctrl_20231008"
## [178] "ext_neg_ctrl_2024220A"
## [179] "ext_neg_ctrl_2024220B"
## [180] "ext_neg_ctrl_2024221A"
## [181] "ext_neg_ctrl_2024221B"
## [182] "ext_neg_ctrl_2024222A"
## [183] "ext_neg_ctrl_2024222B"
## [184] "ext_neg_ctrl_2024312A"
## [185] "ext_neg_ctrl_2024312B"
## [186] "ext_neg_ctrl_2024314A"
## [187] "ext_neg_ctrl_2024314B"
## [188] "ext_neg_ctrl_2024319"
## [189] "KLS0007"
## [190] "KLS0027"
## [191] "KLS0044"
## [192] "KLS0045"
## [193] "KLS0052"
## [194] "KLS0054"
## [195] "KLS0055"
## [196] "KLS0071"
## [197] "KLS0095"
## [198] "KLS0096"
## [199] "KLS0105"
## [200] "KLS0106"
## [201] "KLS0119"
## [202] "KLS0134"
## [203] "KLS0135"
## [204] "KLS0136"
## [205] "KLS0137"
## [206] "KLS0138"
## [207] "KLS0139"
## [208] "KLS0150"
## [209] "KLS0153"
## [210] "KLS0155"
## [211] "KLS0156"
## [212] "KLS0159"
## [213] "KLS0163"
## [214] "KLS0165"
## [215] "KLS0167"
## [216] "KLS0168"
## [217] "KLS0169"
## [218] "KLS0170"
## [219] "KLS0200"
## [220] "KLS0201"
## [221] "KLS0205"
## [222] "KLS0209"
## [223] "KLS0221"
## [224] "KLS0224"
## [225] "KLS0225"
## [226] "KLS0227"
## [227] "KLS0241"
## [228] "KLS0244"
## [229] "KLS0246"
## [230] "KLS0248"
## [231] "KLS0253"
## [232] "KLS0254"
## [233] "KLS0256"
## [234] "KLS0259"
## [235] "KLS0263"
## [236] "KLS0266"
## [237] "KLS0272"
## [238] "pcr_rbcL_neg_crtl_20240417"
## [239] "pcr_rbcL_neg_ctrl_20240409"
## [240] "pcr_rbcL_neg_ctrl_20240418A"
## [241] "pcr_rbcL_neg_ctrl_20240418B"
## [242] "pcr_rbcL_neg_ctrl_20240523"
## [243] "rbcL_pcr_neg_ctrl_20231021_20231119"
## [244] "SCA0009"
## [245] "SCA0010"
## [246] "SCA0013"
## [247] "Kingdom"
## [248] "Phylum"
## [249] "Class"
## [250] "Order"
## [251] "Family"
## [252] "Genus"
## [253] "Species"
library(tidyverse)
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse() masks Biostrings::collapse(), IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ purrr::compact() masks XVector::compact()
## ✖ purrr::compose() masks ShortRead::compose()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks GenomicAlignments::first(), S4Vectors::first()
## ✖ dplyr::id() masks ShortRead::id()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks GenomicAlignments::last()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks GenomicAlignments::second(), S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice() masks XVector::slice(), IRanges::slice()
## ✖ tibble::view() masks ShortRead::view()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Transpose rbcL.IDs (ASV table by sample) into long format after calculating total reads for each ASV (each row in rbcL.IDs). After transposing, calculate total reads for each Sample
rbcL.IDs<-rbcL.IDs %>% mutate(ASVTotalReads = select(., '2020_6_16_H1': 'SCA0013') %>% rowSums()) # total ASV reads
rbcL.IDs.long<-rbcL.IDs %>% pivot_longer(cols = c(where(is.numeric), -ASVTotalReads), names_to = "Sample", values_to = "Reads") %>% filter(Reads>0) %>% group_by(Sample) %>% mutate(SampleTotalReads=sum(Reads)) # total Sample reads
#rbcL.IDs<-partial_join(rbcL.IDs,___sample-info-data___,"Sample", "SampleName") #join sample info if you want/have it
1,928,317 reads across all samples (1,898,063 reads after removing low-abundance ASVs) ((1,562,182 reads after also removing contaminant sequences)) 237 samples with 1+ reads (232 after removing low-abundance ASVs) 1 to 28,062 reads per sample (mean = 8.1K)
sum(rbcL.IDs$ASVTotalReads) #1,928,317 reads across all samples (before removing ASVs with less than 100 total reads) #1,898,063 reads after removing low-abundance ASVs #1,562,182 reads after also removing contaminant sequences
## [1] 1562182
rbcL.IDs %>% select(where(is.numeric), -ASVTotalReads) %>% colnames(.) %>% n_distinct(.) # 246 samples (but 9 of them have 0 reads)
## [1] 246
n_distinct(rbcL.IDs.long$Sample) # 237 samples (before removing ASVs with less than 100 total reads) #232 reads after removing low-abundance ASVs
## [1] 232
temp<-as.data.frame(rbcL.IDs %>% select(where(is.numeric), -ASVTotalReads) %>% colSums(.)) # sum up all the reads for all samples that appear in the rbcL.IDs dataset (basically an ASV table by sample with taxonomic ids)
colnames(temp)<-"TotalReads" #rename column
temp %>% filter(TotalReads==0) # filter to view just samples with 0 reads (these samples get dropped from the data when this dataset is transformed long into rbcL.IDs.long)
## TotalReads
## 2020_7_1_H4 0
## 2020_7_14_H5 0
## 2020_7_14_H6 0
## 2020_7_16_H9 0
## 2021_7_14_H12 0
## 2021_7_21_H26 0
## ext_neg_ctrl_20231007 0
## ext_neg_ctrl_20231008 0
## ext_neg_ctrl_2024220A 0
## ext_neg_ctrl_2024312A 0
## ext_neg_ctrl_2024312B 0
## ext_neg_ctrl_2024314A 0
## ext_neg_ctrl_2024314B 0
## pcr_rbcL_neg_ctrl_20240523 0
rm(temp)
hist(rbcL.IDs.long %>% group_by(Sample) %>% summarise(SumReads=sum(Reads)) %>% select(-Sample) %>% pull(SumReads), xlab="SampleReads", main=NULL) # 1 to ~30,000 reads per sample
summary(rbcL.IDs.long %>% group_by(Sample) %>% summarise(SumReads=sum(Reads)) %>% select(-Sample))
## SumReads
## Min. : 2
## 1st Qu.: 2475
## Median : 5975
## Mean : 6734
## 3rd Qu.: 9668
## Max. :22161
# plot of reads per sample for __negative control samples__ (color coded by above/below 2K reads)
rbcL.IDs.long %>% filter(str_starts(Sample,'ext')|str_starts(Sample,'pcr')|str_starts(Sample,'rbcL')) %>% group_by(Sample) %>% summarise(SumReads=sum(Reads)) %>%
ggplot(aes(x=Sample,y=SumReads, fill=SumReads<2000))+
geom_col()+
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))
#plot of reads per sample for __unknown samples__ (color coded by below/above 2K reads)
rbcL.IDs.long %>% filter(!str_starts(Sample,'ext')&!str_starts(Sample,'pcr')&!str_starts(Sample,'rbcL')) %>% group_by(Sample) %>% summarise(SumReads=sum(Reads)) %>%
ggplot(aes(x=Sample,y=SumReads, fill=SumReads>2000))+
geom_col()+
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))
#histogram for __unk samples__
rbcL.IDs.long %>% filter(!str_starts(Sample,'ext')&!str_starts(Sample,'pcr')&!str_starts(Sample,'rbcL')) %>% group_by(Sample) %>% summarise(SumReads=sum(Reads)) %>%
ggplot(aes(SumReads))+
geom_histogram(binwidth = 1000, color="black")
#histogram for __neg ctrls__
rbcL.IDs.long %>% filter(str_starts(Sample,'ext')|str_starts(Sample,'pcr')|str_starts(Sample,'rbcL')) %>% group_by(Sample) %>% summarise(SumReads=sum(Reads)) %>%
ggplot(aes(SumReads))+
geom_histogram(binwidth = 250, color="black")
952 total ASVs across all samples (260 ASVs after removing low-abund ASVs) ((251 ASVs after also removing contaminant sequences)) 432 of all ASVs were not assigned to species ((69 of all ASVs were not assigned to species after removing low abund and contaminating sequences))
length(rownames(rbcL.IDs)) # 952 total ASVs across all samples before removing low-abund ASVs #(260 ASVs after removing low-abund ASVs) #((251 ASVs after also removing contaminant sequences))
## [1] 251
#(numbers in parentheses below reflect totals after low-abund ASV removal)
rbcL.IDs%>%filter(is.na(Family)) %>% summarize(n=n()) # 149 (4) of all ASVs were not assigned to family
## n
## 1 3
rbcL.IDs%>%filter(is.na(Genus)) %>% summarize(n=n()) # 242 (31) ((30)) of all ASVs were not assigned to genus
## n
## 1 28
rbcL.IDs%>%filter(is.na(Species)) %>% summarize(n=n()) # 432 (69) of all ASVs were not assigned to species
## n
## 1 72
most ASVs have very few total reads there are a handful of common species assigned to most ASVs, but also a lot of reads whose ASV could not be assigned
# most ASVs have very few total reads
ggplot(rbcL.IDs, aes(x=ASVTotalReads))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
There are a handful of common species assigned to most ASVs, but also a lot of reads whose ASV could not be assigned
ggplot(rbcL.IDs, aes(x=Species,y=ASVTotalReads))+
geom_col()+
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))
Overall, about 80% of reads were assigned to species
rbcL.IDs %>% filter(is.na(Species)) %>% summarize(sum=sum(ASVTotalReads)) #329,977 reads unassigned to species (out of 1,928,317 reads across all samples total reads)
## sum
## 1 295661
#after removing low-abund ASVs, there are 285,984 reads unassigned to species (out of 1,898,063 reads total)
#after also removing contaminating sequences, there are 265,340 reads unassigned to species (out of 1,562,182 reads total)
(rbcL.IDs %>% filter(!is.na(Species)) %>% summarize(sum=sum(ASVTotalReads)))/1562182 # ~83% of reads assigned to species; this value may change as the total project reads (denominator) or total assigned reads (numerator) changes with different upstream QC, filtering parameters
## sum
## 1 0.8107384
some samples with a LOT of unassigned reads how many Unk ASVs per sample
ggplot(rbcL.IDs.long,aes(x=Sample, y=Reads, fill=!is.na(Species)))+
geom_col(position = "fill")+
labs(x="", y="Proportion of Reads")+
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))
#some samples with a LOT of unassigned reads
rbcL.IDs.long %>% filter(is.na(Species)) %>% group_by(Sample) %>% dplyr::summarize(UnkRichness=sum(is.na(Species))) %>%
ggplot(aes(x=Sample, y=UnkRichness))+
geom_col()+
labs(x="",y="Unk ASV Richness")
# how many Unk ASVs per sample
rbcL.IDs.long %>% group_by(Sample) %>% dplyr::summarize(UnkRichness = sum(is.na(Species)), KnownRichness = sum(!is.na(Species)), UnkProp = UnkRichness/(UnkRichness+KnownRichness))
## # A tibble: 232 × 4
## Sample UnkRichness KnownRichness UnkProp
## <chr> <int> <int> <dbl>
## 1 2020_6_16_H1 0 5 0
## 2 2020_6_16_H5 0 5 0
## 3 2020_6_16_H6 0 2 0
## 4 2020_6_17_H2 0 7 0
## 5 2020_6_17_H4 0 5 0
## 6 2020_6_17_H8 0 8 0
## 7 2020_6_18_H3 0 5 0
## 8 2020_6_18_H7 1 2 0.333
## 9 2020_6_18_H9 0 1 0
## 10 2020_6_30_H1 0 7 0
## # ℹ 222 more rows
rbcL.IDs.long %>% group_by(Sample) %>% dplyr::summarize(UnkRichness = sum(is.na(Species)), KnownRichness = sum(!is.na(Species)), UnkProp = UnkRichness/(UnkRichness+KnownRichness)) %>%
ggplot(aes(x=Sample, y=UnkProp))+
geom_col()+
labs(x="",y="Proportion of ASVs Unidentified to Species")
#histogram of ASVs unassigned to species (with more than 1000 reads)
rbcL.IDs %>% filter(is.na(Species)&ASVTotalReads>1000) %>% select(ASVTotalReads)%>%
ggplot(aes(ASVTotalReads))+
geom_histogram(binwidth = 10000, color="black")
#these 23 ASVs represent ~75% of the Species=NA reads (recall: 329,977 reads unassigned to species)
head(rbcL.IDs %>% filter(is.na(Species)&ASVTotalReads>1000) %>% select(ASVTotalReads)) # common (>1000 reads) ASVs that were not assigned to species (23 ASVs that comprise a total of 247,220 reads)
## ASVTotalReads
## AACCTGGAGTTCCGCCCGAGGAAGCAGGGGCCGCGGTAGCTGCGGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGACGGGCTTACCAGTCTTGATCGTTATAAGGGACGATGCTACGACATCGAGCCTGTTGCTGGAGAAGAAAATCAATATATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAGGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGTAATGTATTTGGGTTCAAAGCCCTACGCGCTCTACGTCTGGAGGATTTGCGAATTCCTGTTGCTTATGTTAAAACTTTCCAAGGCCCACCTCATGGTATCCAAGTTGAGAGAGATAAATTGAATAAGTATGGTCGCCCCCTATTGGGCTGTACTATTAAACCTAAATTGGGGTTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTA 104923
## AACCTGGAGTTCCACCTGAGGAAGCAGGGGCCGCGGTAGCTGCTGAATCTTCTACTGGTACATGGACAACTGTATGGACTGACGGTCTTACCAGTCTTGATCGTTACAAAGGTCGATGCTACCACATCGAGCCTGTTGCTGGAGAAGAAAGTCAATTTATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTAGGTAATGTGTTTGGGTTCAAGGCCCTGCGCGCTCTACGTCTGGAGGATTTGCGAATCCCTGTTGCTTATGTTAAAACTTTCCAGGGCCCGCCTCATGGTATCCAAGTTGAGAGAGATAAATTGAACAAGTATGGCCGCCCTCTATTGGGATGTACTATAAAACCAAAATTGGGGTTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTA 34522
## AACCTGGAGTTCCGCCCGAGGAAGCAGGGGCCGCGGTAGCTGCTGAATCTTCCACTGGGACATGGACAACTGTGTGGACTGACGGGCTTACCAGTCTTGATCGTTACAAAGGACGATGCTACCACATCGAGCCGGTTGCTGGAGAAGAAAATCAATTTATTGCTTATGTAGCTTACCCCTTAGACCTCTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGTAATGTATTTGGATTCAAGGCCCTGCGCGCTCTACGTCTGGAGGATTTGCGAATCCCTACTTCTTATTCTAAAACTTTCCAAGGTCCGCCTCATGGCATCCAAGTTGAGAGGGATAAATTAAACAAGTATGGCCGCCCCCTATTAGGATGTACTATTAAACCTAAATTGGGATTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTC 19868
## AACCTGGAGTTCCACCTGAAGAAGCAGGGGCTGCGGTAGCTGCCGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGATGGACTTACCAGCCTTGATCGTTACAAAGGGCGGTGCTACCACATCGAACCCGTTGCTGGAGAAGAAAGTCAATTTATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGAAATGTATTTGGATTCAAAGCCCTGCGTGCTCTACGTCTGGAAGATCTGCGAATCCCTACTGCTTATACTAAAACTTTCCAAGGCCCGCCTCATGGCATCCAAGTTGAGAGAGATAAATTGAACAAGTATGGTCGTCCCCTATTGGGATGTACTATTAAACCTAAATTGGGGTTATCTGCTAAAAACTATGGTAGAGCAGTTTATGAATGTCTC 18890
## CGCAACCTGGAGTTCCGGCTGAAGAAGCAGGTGCAGCGGTAGCTGCCGAATCTTCCACTGGGACATGGACAACTGTGTGGACCGATGGACTTACCAGTCTTGATCGTTATAAAGGACGCTGCTACCACATCGAACCTGTTGCTGGAGAAGAGACTCAATTTATTGCTTATGTAGCTTATCCCTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACCTCCATTGTAGGTAATGTATTTGGGTTCAAGGCCTTGCGTGCTCTACGTCTGGAAGATTTGCGAATCCCCGTTGCTTATGTTAAAACTTTCCAAGGTCCTCCTCACGGAATCCAAGTTGAGAGAGATAAATTGAACAAATATGGACGTCCCCTATTGGGATGTACTATTAAACCTAAATTGGGTTTATCCGCTAAAAATTACGGTAGAGCAGTTTATGAATGTCTA 16182
## AACCTGGAGTTCCTCCTGAAGAAGCAGGGGCCGCAGTAGCTGCCGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGATGGACTTACCAGCCTTGATCGTTACAAAGGGCGATGCTACCACATCGATGCCGTTCCGGGAGAAGAAAATCAATATATATGTTATGTAGCTTATCCTTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATCGTAGGTAATGTATTTGGGTTCAAAGCCCTGCGCGCTCTACGTCTGGAAGATCTGCGAATCCCTCCTGCTTATGTTAAAACTTTCCAAGGCCCACCTCATGGGATCCAAGTTGAAAGAGATAAATTGAACAAGTACGGCCGTCCTTTGTTGGGATGTACTATTAAACCTAAATTGGGCTTATCCGCTAAAAACTACGGTAGAGCAGTTTATGAATGTCTT 12438
length((rbcL.IDs %>% filter(is.na(Species)&ASVTotalReads>1000) %>% select(ASVTotalReads))$ASVTotalReads)
## [1] 26
sum(rbcL.IDs %>% filter(is.na(Species)&ASVTotalReads>1000) %>% select(ASVTotalReads))
## [1] 277642
A mean of 30% of ASVs per sample are unassigned to species, but only a mean of 5% of ASVs per sample are unassigned to Family.
# creating a summary table called "ASVs" to count the number of (un)assigned reads and taxonomic richness for each sample
a<-rbcL.IDs.long %>% group_by(Sample) %>% summarise(SampleTotalReads=sum(Reads))
b<-rbcL.IDs.long %>% group_by(Sample) %>% summarise(CountASVs=n())
c<-rbcL.IDs.long %>% group_by(Sample, .drop=FALSE) %>% filter(is.na(Species)) %>% summarise(ASVs_NoSpp=n())
d<-rbcL.IDs.long %>% group_by(Sample, .drop=FALSE) %>% filter(is.na(Family)) %>% summarise(ASVs_NoFam=n())
e<-rbcL.IDs.long %>% group_by(Sample, .drop=FALSE) %>% summarise(Families=n_distinct(Family), Genera=n_distinct(Genus))
ASVs <- cbind(a, b[,2],c[,2],d[,2],e[,-1])
ASVs<- ASVs %>% mutate(PercNoSpp = (ASVs_NoSpp/CountASVs)*100, PercNoFam=(ASVs_NoFam/CountASVs)*100) # number and percent of ASVs not assigned to species or family
summary(ASVs)
## Sample SampleTotalReads CountASVs ASVs_NoSpp
## Length:232 Min. : 2 Min. : 1.00 Min. :0.000
## Class :character 1st Qu.: 2475 1st Qu.: 5.00 1st Qu.:1.000
## Mode :character Median : 5975 Median : 8.00 Median :2.000
## Mean : 6734 Mean :10.27 Mean :2.806
## 3rd Qu.: 9668 3rd Qu.:14.00 3rd Qu.:5.000
## Max. :22161 Max. :30.00 Max. :9.000
## ASVs_NoFam Families Genera PercNoSpp
## Min. :0.0000 Min. : 1.000 Min. : 1.000 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.: 3.000 1st Qu.: 4.000 1st Qu.:11.11
## Median :0.0000 Median : 5.000 Median : 6.000 Median :25.00
## Mean :0.2543 Mean : 6.181 Mean : 6.767 Mean :23.19
## 3rd Qu.:0.0000 3rd Qu.: 9.000 3rd Qu.: 9.000 3rd Qu.:33.33
## Max. :3.0000 Max. :18.000 Max. :21.000 Max. :66.67
## PercNoFam
## Min. : 0.00
## 1st Qu.: 0.00
## Median : 0.00
## Mean : 1.60
## 3rd Qu.: 0.00
## Max. :16.67
ggplot(ASVs, aes(x=Sample, y=PercNoSpp))+
geom_col()
ggplot(ASVs, aes(x=Sample, y=PercNoFam))+
geom_col()
mean = 6.3 species (range = 1-20 species) per sample
rbcL.IDs.long %>% filter(!is.na(Species)) %>% group_by(Sample) %>% dplyr::summarize(Richness=n_distinct(Species))
## # A tibble: 232 × 2
## Sample Richness
## <chr> <int>
## 1 2020_6_16_H1 4
## 2 2020_6_16_H5 5
## 3 2020_6_16_H6 2
## 4 2020_6_17_H2 5
## 5 2020_6_17_H4 5
## 6 2020_6_17_H8 5
## 7 2020_6_18_H3 3
## 8 2020_6_18_H7 2
## 9 2020_6_18_H9 1
## 10 2020_6_30_H1 5
## # ℹ 222 more rows
summary(rbcL.IDs.long %>% filter(!is.na(Species)) %>% group_by(Sample) %>% dplyr::summarize(Richness=n_distinct(Species)))
## Sample Richness
## Length:232 Min. : 1.00
## Class :character 1st Qu.: 3.00
## Mode :character Median : 5.00
## Mean : 6.25
## 3rd Qu.: 9.00
## Max. :20.00
rbcL.IDs.long %>% filter(!is.na(Species)) %>% group_by(Sample) %>% dplyr::summarize(Richness=n_distinct(Species)) %>%
ggplot(aes(x=Sample, y=Richness))+
geom_col()+
labs(x="",y="Spp Richness")+
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))
# numbers of reads per species per sample
ggplot(rbcL.IDs.long,aes(x=Sample, y=Reads, fill=Species))+
geom_col()+
labs(x="", y="Num of Reads")+
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1), legend.position="none")
#the legend for the above plot
grid::grid.newpage()
grid::grid.draw(cowplot::get_legend(ggplot(rbcL.IDs.long,aes(x=Sample, y=Reads, fill=Species))+geom_col()))
## Warning in get_plot_component(plot, "guide-box"): Multiple components found;
## returning the first one. To return all, use `return_all = TRUE`.
# proportion of reads per species per sample
ggplot(rbcL.IDs.long,aes(x=Sample, y=Reads, fill=Species))+
geom_bar(position="fill", stat="identity") +
labs(x="", y="Prop of Reads")+
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1), legend.position="none")
OTU <- otu_table(seqtab.nochim.nocontam, taxa_are_rows = F, errorIfNULL=TRUE)
TAX <- tax_table(taxa.rbcl)
physeq = phyloseq(OTU, TAX)
physeq
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 251 taxa and 246 samples ]
## tax_table() Taxonomy Table: [ 251 taxa by 7 taxonomic ranks ]
slotNames(physeq)
## [1] "otu_table" "tax_table" "sam_data" "phy_tree" "refseq"
# example of what you can do with phyloseq object, physeq:
# make plots:
# plot_bar(physeq, fill = "Species") # this is basically the same as the plot under 'how many spp per sample?'
class(OTU) <- "matrix" # as.matrix() will do nothing
## Warning in class(OTU) <- "matrix": Setting class(x) to "matrix" sets attribute
## to NULL; result will no longer be an S4 object
vegan::rarecurve(OTU, step = 50, xlab = "Sample Size", ylab = "Species", label = TRUE, tidy=T) %>%
ggplot(aes(x=Sample, y=Species, col=Site))+
geom_line()+
labs(x="Read Depth", y="ASVs detected", col="")+
theme(legend.position = "none")+
lims(x=c(0,30000),y=c(0,30))
## empty rows removed
identify families that are not being identified to species
Using threshold of 1000 reads to designate ‘abundant’ vs ‘non-abundant’ ASVs from “How many unassigned ASVs have more than 1000 reads?” ASVs with this many total reads, but which were unassigned to species should be pulled out for futher investigation
rbcL.IDs.long %>% ggplot(aes(x=Reads)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
rbcL.IDs.long %>% filter(Reads<1000) %>% ggplot(aes(x=Reads)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Here extract the sequences of abundant (more than 100 reads) but unidentified (Family or Species is NA) ASVs
rbcL.IDs.long %>% filter(Reads>1000 & is.na(Species)) %>% ggplot(aes(x=Sample, fill=Family))+
geom_bar() +
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))
#count sequences of abundant (more than 1000 reads) but unidentified ASVs (Species or Family is NA)
rbcL.IDs.long %>% filter(Reads>1000 & is.na(Family)) # 1 ASVs
## # A tibble: 1 × 11
## # Groups: Sample [1]
## Kingdom Phylum Class Order Family Genus Species ASVTotalReads Sample Reads
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <int>
## 1 k__Viridip… p__St… c__s… o__L… <NA> <NA> <NA> 10131 Bg007 9559
## # ℹ 1 more variable: SampleTotalReads <int>
dim(rbcL.IDs %>% filter(ASVTotalReads>1000 & is.na(Family))) # 2 ASVs
## [1] 2 254
dim(rbcL.IDs %>% filter(ASVTotalReads>1000 & is.na(Species))) # 23 ASVs
## [1] 26 254
#extract sequences of abundant unidentified ASVs
head(rownames(rbcL.IDs %>% filter(ASVTotalReads>1000 & is.na(Species))))
## [1] "AACCTGGAGTTCCGCCCGAGGAAGCAGGGGCCGCGGTAGCTGCGGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGACGGGCTTACCAGTCTTGATCGTTATAAGGGACGATGCTACGACATCGAGCCTGTTGCTGGAGAAGAAAATCAATATATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAGGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGTAATGTATTTGGGTTCAAAGCCCTACGCGCTCTACGTCTGGAGGATTTGCGAATTCCTGTTGCTTATGTTAAAACTTTCCAAGGCCCACCTCATGGTATCCAAGTTGAGAGAGATAAATTGAATAAGTATGGTCGCCCCCTATTGGGCTGTACTATTAAACCTAAATTGGGGTTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTA"
## [2] "AACCTGGAGTTCCACCTGAGGAAGCAGGGGCCGCGGTAGCTGCTGAATCTTCTACTGGTACATGGACAACTGTATGGACTGACGGTCTTACCAGTCTTGATCGTTACAAAGGTCGATGCTACCACATCGAGCCTGTTGCTGGAGAAGAAAGTCAATTTATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTAGGTAATGTGTTTGGGTTCAAGGCCCTGCGCGCTCTACGTCTGGAGGATTTGCGAATCCCTGTTGCTTATGTTAAAACTTTCCAGGGCCCGCCTCATGGTATCCAAGTTGAGAGAGATAAATTGAACAAGTATGGCCGCCCTCTATTGGGATGTACTATAAAACCAAAATTGGGGTTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTA"
## [3] "AACCTGGAGTTCCGCCCGAGGAAGCAGGGGCCGCGGTAGCTGCTGAATCTTCCACTGGGACATGGACAACTGTGTGGACTGACGGGCTTACCAGTCTTGATCGTTACAAAGGACGATGCTACCACATCGAGCCGGTTGCTGGAGAAGAAAATCAATTTATTGCTTATGTAGCTTACCCCTTAGACCTCTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGTAATGTATTTGGATTCAAGGCCCTGCGCGCTCTACGTCTGGAGGATTTGCGAATCCCTACTTCTTATTCTAAAACTTTCCAAGGTCCGCCTCATGGCATCCAAGTTGAGAGGGATAAATTAAACAAGTATGGCCGCCCCCTATTAGGATGTACTATTAAACCTAAATTGGGATTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTC"
## [4] "AACCTGGAGTTCCACCTGAAGAAGCAGGGGCTGCGGTAGCTGCCGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGATGGACTTACCAGCCTTGATCGTTACAAAGGGCGGTGCTACCACATCGAACCCGTTGCTGGAGAAGAAAGTCAATTTATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGAAATGTATTTGGATTCAAAGCCCTGCGTGCTCTACGTCTGGAAGATCTGCGAATCCCTACTGCTTATACTAAAACTTTCCAAGGCCCGCCTCATGGCATCCAAGTTGAGAGAGATAAATTGAACAAGTATGGTCGTCCCCTATTGGGATGTACTATTAAACCTAAATTGGGGTTATCTGCTAAAAACTATGGTAGAGCAGTTTATGAATGTCTC"
## [5] "CGCAACCTGGAGTTCCGGCTGAAGAAGCAGGTGCAGCGGTAGCTGCCGAATCTTCCACTGGGACATGGACAACTGTGTGGACCGATGGACTTACCAGTCTTGATCGTTATAAAGGACGCTGCTACCACATCGAACCTGTTGCTGGAGAAGAGACTCAATTTATTGCTTATGTAGCTTATCCCTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACCTCCATTGTAGGTAATGTATTTGGGTTCAAGGCCTTGCGTGCTCTACGTCTGGAAGATTTGCGAATCCCCGTTGCTTATGTTAAAACTTTCCAAGGTCCTCCTCACGGAATCCAAGTTGAGAGAGATAAATTGAACAAATATGGACGTCCCCTATTGGGATGTACTATTAAACCTAAATTGGGTTTATCCGCTAAAAATTACGGTAGAGCAGTTTATGAATGTCTA"
## [6] "AACCTGGAGTTCCTCCTGAAGAAGCAGGGGCCGCAGTAGCTGCCGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGATGGACTTACCAGCCTTGATCGTTACAAAGGGCGATGCTACCACATCGATGCCGTTCCGGGAGAAGAAAATCAATATATATGTTATGTAGCTTATCCTTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATCGTAGGTAATGTATTTGGGTTCAAAGCCCTGCGCGCTCTACGTCTGGAAGATCTGCGAATCCCTCCTGCTTATGTTAAAACTTTCCAAGGCCCACCTCATGGGATCCAAGTTGAAAGAGATAAATTGAACAAGTACGGCCGTCCTTTGTTGGGATGTACTATTAAACCTAAATTGGGCTTATCCGCTAAAAACTACGGTAGAGCAGTTTATGAATGTCTT"
rownames(rbcL.IDs %>% filter(ASVTotalReads>1000 & is.na(Species)))[1] #the first ASV
## [1] "AACCTGGAGTTCCGCCCGAGGAAGCAGGGGCCGCGGTAGCTGCGGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGACGGGCTTACCAGTCTTGATCGTTATAAGGGACGATGCTACGACATCGAGCCTGTTGCTGGAGAAGAAAATCAATATATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAGGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGTAATGTATTTGGGTTCAAAGCCCTACGCGCTCTACGTCTGGAGGATTTGCGAATTCCTGTTGCTTATGTTAAAACTTTCCAAGGCCCACCTCATGGTATCCAAGTTGAGAGAGATAAATTGAATAAGTATGGTCGCCCCCTATTGGGCTGTACTATTAAACCTAAATTGGGGTTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTA"
#"AACCTGGAGTTCCGCCCGAGGAAGCAGGGGCCGCGGTAGCTGCGGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGACGGGCTTACCAGTCTTGATCGTTATAAGGGACGATGCTACGACATCGAGCCTGTTGCTGGAGAAGAAAATCAATATATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAGGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGTAATGTATTTGGGTTCAAAGCCCTACGCGCTCTACGTCTGGAGGATTTGCGAATTCCTGTTGCTTATGTTAAAACTTTCCAAGGCCCACCTCATGGTATCCAAGTTGAGAGAGATAAATTGAATAAGTATGGTCGCCCCCTATTGGGCTGTACTATTAAACCTAAATTGGGGTTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTA"
BLAST search for the first ASV came back with 100% identity to several Salix species, including S. nigera
knitr::kable(rbcL.IDs %>%
select(Kingdom:ASVTotalReads) %>%
filter(ASVTotalReads>1000 & is.na(Species)),
row.names = FALSE)
| Kingdom | Phylum | Class | Order | Family | Genus | Species | ASVTotalReads |
|---|---|---|---|---|---|---|---|
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__rosids_71275 | o__Malpighiales_3646 | f__Salicaceae_3688 | g__Salix_40685 | NA | 104923 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__rosids_71275 | o__Rosales_3744 | f__Rosaceae_3745 | NA | NA | 34522 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__rosids_71275 | o__Fagales_3502 | f__Fagaceae_3503 | NA | NA | 19868 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__asterids_71274 | o__Cornales_41934 | f__Cornaceae_42219 | g__Nyssa_4290 | NA | 18890 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__rosids_71275 | o__Fabales_72025 | f__Fabaceae_3803 | g__Medicago_3877 | NA | 16182 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__asterids_71274 | o__Boraginales_1538097 | f__Boraginaceae_21571 | NA | NA | 12438 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__asterids_71274 | o__Lamiales_4143 | NA | NA | NA | 10131 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__asterids_71274 | o__Asterales_4209 | f__Asteraceae_4210 | NA | NA | 10017 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__undef__5 | o__Ranunculales_41768 | f__Papaveraceae_3465 | g__Dicentra_22680 | NA | 7202 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__asterids_71274 | o__Lamiales_4143 | f__Lamiaceae_4136 | NA | NA | 7021 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__asterids_71274 | o__Dipsacales_4199 | f__Adoxaceae_4206 | g__Viburnum_4204 | NA | 5322 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__rosids_71275 | o__Vitales_403667 | f__Vitaceae_3602 | NA | NA | 5111 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__asterids_71274 | o__Ericales_41945 | f__Ericaceae_4345 | g__Vaccinium_13749 | NA | 3138 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__undef__5 | o__Ranunculales_41768 | f__Papaveraceae_3465 | NA | NA | 3068 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | NA | NA | NA | NA | NA | 2689 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__asterids_71274 | o__Solanales_4069 | f__Solanaceae_4070 | g__Solanum_4107 | NA | 2603 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__rosids_71275 | o__Brassicales_3699 | f__Brassicaceae_3700 | g__Hesperis_264417 | NA | 2467 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__rosids_71275 | o__Malpighiales_3646 | f__Salicaceae_3688 | g__Salix_40685 | NA | 2095 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__Pinidae_3313 | o__Pinales_1446380 | f__Pinaceae_3318 | g__Pinus_3337 | NA | 1515 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__rosids_71275 | o__Rosales_3744 | f__Rosaceae_3745 | g__Spiraea_23224 | NA | 1376 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__Liliopsida_4447 | o__Asparagales_73496 | f__Hyacinthaceae_44985 | NA | NA | 1351 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__rosids_71275 | o__Fabales_72025 | f__Fabaceae_3803 | g__Cercis_49800 | NA | 1195 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__asterids_71274 | o__Lamiales_4143 | f__Lamiaceae_4136 | NA | NA | 1193 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__asterids_71274 | o__Lamiales_4143 | f__Lamiaceae_4136 | g__Lamium_53158 | NA | 1167 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__Liliopsida_4447 | o__Asparagales_73496 | f__Hyacinthaceae_44985 | g__Scilla_4701 | NA | 1148 |
| k__Viridiplantae_33090 | p__Streptophyta_35493 | c__sub__asterids_71274 | o__Apiales_4036 | f__Apiaceae_4037 | NA | NA | 1010 |
#writepath:"/scratch/kls7sg/Bioinformatics/RMarkdown/rbcL_bioinformatics_files/"
#altwritepath:"/Users/kelseyschoenemann/Desktop/Bioinformatics/RMarkdown/rbcL_bioinformatics_files"
# save all ASVs with >1000 reads unassigned to species
save<-as.data.frame(rownames(rbcL.IDs %>% filter(ASVTotalReads>1000 & is.na(Species))))
write.csv(save, file="/scratch/kls7sg/Bioinformatics/RMarkdown/rbcL_bioinformatics_files/UnkSpp_rbcL_ASVs.csv")
# seqtab.nochim.nocontam for phyloseq obj creation
write.csv(seqtab.nochim.nocontam, file="/scratch/kls7sg/Bioinformatics/RMarkdown/rbcL_bioinformatics_files/seqtab.nochim.nocontam.csv")
# taxa.rbcl for phyloseq obj creation
write.csv(taxa.rbcl, file="/scratch/kls7sg/Bioinformatics/RMarkdown/rbcL_bioinformatics_files/taxa.rbcl.csv")
# rbcL.IDs
write.csv(rbcL.IDs, file="/scratch/kls7sg/Bioinformatics/RMarkdown/rbcL_bioinformatics_files/rbcL.IDs.csv")
# rbcL.IDs.long
write.csv(rbcL.IDs.long, file="/scratch/kls7sg/Bioinformatics/RMarkdown/rbcL_bioinformatics_files/rbcL.IDs.long.csv")